final lol

This commit is contained in:
Joey Eamigh
2024-04-18 18:05:57 -04:00
parent 45f293f75c
commit bc4b531f54
3 changed files with 52 additions and 4 deletions

View File

@@ -1,3 +1,7 @@
fit:
@echo "Fitting the model"
@python busi410project/fit.py
means:
@echo "Calculating the means"
@python busi410project/means.py

View File

@@ -40,9 +40,6 @@ model = sm.OLS(y, X)
results = model.fit()
summary = results.summary()
# print(results.pvalue)
print(results.conf_int_el(0))
with open("./out/model_summary_cast.csv", "w") as f:
with open("./out/model_summary_genres.csv", "w") as f:
f.write(summary.as_csv())

47
busi410project/means.py Normal file
View File

@@ -0,0 +1,47 @@
import os
import numpy as np
import pandas as pd
from categorize import categorize
if not os.path.exists("./out"):
os.makedirs("./out")
if not os.path.exists("./out/categoricals.csv"):
df = categorize()
else:
df = pd.read_csv("./out/categoricals.csv", index_col=0)
to_average = [
"Genre 1",
"Genre 2",
"Genre 3",
]
df["Genre 1"] = pd.Categorical(df["Genre 1"])
df["Genre 2"] = pd.Categorical(df["Genre 2"])
df["Genre 3"] = pd.Categorical(df["Genre 3"])
genres = np.unique(
np.concatenate(
[
df["Genre 1"].cat.categories,
df["Genre 2"].cat.categories,
df["Genre 3"].cat.categories,
]
)
)
# combine all genre columns into one, and take the mean of the Gross for each genre
genre_means = pd.DataFrame(index=genres, columns=["Gross"])
for genre in genres:
genre_means.loc[genre] = df[
df[["Genre 1", "Genre 2", "Genre 3"]].eq(genre).any(axis=1)
]["Gross"].mean()
genre_means.dropna(inplace=True)
genre_means.sort_values("Gross", ascending=False, inplace=True)
print(genre_means)
genre_means.to_csv("./out/genre_means.csv")