From bc4b531f54cb622caf2079792408a5ee29b01fe3 Mon Sep 17 00:00:00 2001 From: Joey Eamigh <55670930+JoeyEamigh@users.noreply.github.com> Date: Thu, 18 Apr 2024 18:05:57 -0400 Subject: [PATCH] final lol --- Justfile | 4 ++++ busi410project/fit.py | 5 +---- busi410project/means.py | 47 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 busi410project/means.py diff --git a/Justfile b/Justfile index a000fe6..bc9f416 100644 --- a/Justfile +++ b/Justfile @@ -1,3 +1,7 @@ fit: @echo "Fitting the model" @python busi410project/fit.py + +means: + @echo "Calculating the means" + @python busi410project/means.py \ No newline at end of file diff --git a/busi410project/fit.py b/busi410project/fit.py index 9ff1594..93b08fd 100644 --- a/busi410project/fit.py +++ b/busi410project/fit.py @@ -40,9 +40,6 @@ model = sm.OLS(y, X) results = model.fit() summary = results.summary() -# print(results.pvalue) -print(results.conf_int_el(0)) - -with open("./out/model_summary_cast.csv", "w") as f: +with open("./out/model_summary_genres.csv", "w") as f: f.write(summary.as_csv()) diff --git a/busi410project/means.py b/busi410project/means.py new file mode 100644 index 0000000..d08906c --- /dev/null +++ b/busi410project/means.py @@ -0,0 +1,47 @@ +import os +import numpy as np +import pandas as pd +from categorize import categorize + + +if not os.path.exists("./out"): + os.makedirs("./out") + +if not os.path.exists("./out/categoricals.csv"): + df = categorize() +else: + df = pd.read_csv("./out/categoricals.csv", index_col=0) + +to_average = [ + "Genre 1", + "Genre 2", + "Genre 3", +] + +df["Genre 1"] = pd.Categorical(df["Genre 1"]) +df["Genre 2"] = pd.Categorical(df["Genre 2"]) +df["Genre 3"] = pd.Categorical(df["Genre 3"]) + +genres = np.unique( + np.concatenate( + [ + df["Genre 1"].cat.categories, + df["Genre 2"].cat.categories, + df["Genre 3"].cat.categories, + ] + ) +) + + +# combine all genre columns into one, and take the mean of the Gross for each genre +genre_means = pd.DataFrame(index=genres, columns=["Gross"]) +for genre in genres: + genre_means.loc[genre] = df[ + df[["Genre 1", "Genre 2", "Genre 3"]].eq(genre).any(axis=1) + ]["Gross"].mean() + +genre_means.dropna(inplace=True) +genre_means.sort_values("Gross", ascending=False, inplace=True) + +print(genre_means) +genre_means.to_csv("./out/genre_means.csv")