From bc4b531f54cb622caf2079792408a5ee29b01fe3 Mon Sep 17 00:00:00 2001
From: Joey Eamigh <55670930+JoeyEamigh@users.noreply.github.com>
Date: Thu, 18 Apr 2024 18:05:57 -0400
Subject: [PATCH] final lol

---
 Justfile                |  4 ++++
 busi410project/fit.py   |  5 +----
 busi410project/means.py | 47 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 4 deletions(-)
 create mode 100644 busi410project/means.py

diff --git a/Justfile b/Justfile
index a000fe6..bc9f416 100644
--- a/Justfile
+++ b/Justfile
@@ -1,3 +1,7 @@
 fit:
   @echo "Fitting the model"
   @python busi410project/fit.py
+
+means:
+  @echo "Calculating the means"
+  @python busi410project/means.py
\ No newline at end of file
diff --git a/busi410project/fit.py b/busi410project/fit.py
index 9ff1594..93b08fd 100644
--- a/busi410project/fit.py
+++ b/busi410project/fit.py
@@ -40,9 +40,6 @@ model = sm.OLS(y, X)
 results = model.fit()
 summary = results.summary()
 
-# print(results.pvalue)
-print(results.conf_int_el(0))
 
-
-with open("./out/model_summary_cast.csv", "w") as f:
+with open("./out/model_summary_genres.csv", "w") as f:
     f.write(summary.as_csv())
diff --git a/busi410project/means.py b/busi410project/means.py
new file mode 100644
index 0000000..d08906c
--- /dev/null
+++ b/busi410project/means.py
@@ -0,0 +1,47 @@
+import os
+import numpy as np
+import pandas as pd
+from categorize import categorize
+
+
+if not os.path.exists("./out"):
+    os.makedirs("./out")
+
+if not os.path.exists("./out/categoricals.csv"):
+    df = categorize()
+else:
+    df = pd.read_csv("./out/categoricals.csv", index_col=0)
+
+to_average = [
+    "Genre 1",
+    "Genre 2",
+    "Genre 3",
+]
+
+df["Genre 1"] = pd.Categorical(df["Genre 1"])
+df["Genre 2"] = pd.Categorical(df["Genre 2"])
+df["Genre 3"] = pd.Categorical(df["Genre 3"])
+
+genres = np.unique(
+    np.concatenate(
+        [
+            df["Genre 1"].cat.categories,
+            df["Genre 2"].cat.categories,
+            df["Genre 3"].cat.categories,
+        ]
+    )
+)
+
+
+# combine all genre columns into one, and take the mean of the Gross for each genre
+genre_means = pd.DataFrame(index=genres, columns=["Gross"])
+for genre in genres:
+    genre_means.loc[genre] = df[
+        df[["Genre 1", "Genre 2", "Genre 3"]].eq(genre).any(axis=1)
+    ]["Gross"].mean()
+
+genre_means.dropna(inplace=True)
+genre_means.sort_values("Gross", ascending=False, inplace=True)
+
+print(genre_means)
+genre_means.to_csv("./out/genre_means.csv")