final lol

2024-04-18 18:05:57 -04:00
parent 45f293f75c
commit bc4b531f54
3 changed files with 52 additions and 4 deletions
--- a/4
+++ b/4
@@ -1,3 +1,7 @@
 fit:
  @echo "Fitting the model"
  @python busi410project/fit.py
 means:
  @echo "Calculating the means"
  @python busi410project/means.py
--- a/busi410project/fit.py
+++ b/busi410project/fit.py
@@ -40,9 +40,6 @@ model = sm.OLS(y, X)
 results = model.fit()
 summary = results.summary()
 # print(results.pvalue)
 print(results.conf_int_el(0))
-
+with open("./out/model_summary_genres.csv", "w") as f:
 with open("./out/model_summary_cast.csv", "w") as f:
    f.write(summary.as_csv())
--- a/busi410project/means.py
+++ b/busi410project/means.py
@@ -0,0 +1,47 @@
 import os
 import numpy as np
 import pandas as pd
 from categorize import categorize
 if not os.path.exists("./out"):
    os.makedirs("./out")
 if not os.path.exists("./out/categoricals.csv"):
    df = categorize()
 else:
    df = pd.read_csv("./out/categoricals.csv", index_col=0)
 to_average = [
    "Genre 1",
    "Genre 2",
    "Genre 3",
 ]
 df["Genre 1"] = pd.Categorical(df["Genre 1"])
 df["Genre 2"] = pd.Categorical(df["Genre 2"])
 df["Genre 3"] = pd.Categorical(df["Genre 3"])
 genres = np.unique(
    np.concatenate(
        [
            df["Genre 1"].cat.categories,
            df["Genre 2"].cat.categories,
            df["Genre 3"].cat.categories,
        ]
    )
 )
 # combine all genre columns into one, and take the mean of the Gross for each genre
 genre_means = pd.DataFrame(index=genres, columns=["Gross"])
 for genre in genres:
    genre_means.loc[genre] = df[
        df[["Genre 1", "Genre 2", "Genre 3"]].eq(genre).any(axis=1)
    ]["Gross"].mean()
 genre_means.dropna(inplace=True)
 genre_means.sort_values("Gross", ascending=False, inplace=True)
 print(genre_means)
 genre_means.to_csv("./out/genre_means.csv")