final lol
This commit is contained in:
4
Justfile
4
Justfile
@@ -1,3 +1,7 @@
|
|||||||
fit:
|
fit:
|
||||||
@echo "Fitting the model"
|
@echo "Fitting the model"
|
||||||
@python busi410project/fit.py
|
@python busi410project/fit.py
|
||||||
|
|
||||||
|
means:
|
||||||
|
@echo "Calculating the means"
|
||||||
|
@python busi410project/means.py
|
||||||
@@ -40,9 +40,6 @@ model = sm.OLS(y, X)
|
|||||||
results = model.fit()
|
results = model.fit()
|
||||||
summary = results.summary()
|
summary = results.summary()
|
||||||
|
|
||||||
# print(results.pvalue)
|
|
||||||
print(results.conf_int_el(0))
|
|
||||||
|
|
||||||
|
with open("./out/model_summary_genres.csv", "w") as f:
|
||||||
with open("./out/model_summary_cast.csv", "w") as f:
|
|
||||||
f.write(summary.as_csv())
|
f.write(summary.as_csv())
|
||||||
|
|||||||
47
busi410project/means.py
Normal file
47
busi410project/means.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from categorize import categorize
|
||||||
|
|
||||||
|
|
||||||
|
if not os.path.exists("./out"):
|
||||||
|
os.makedirs("./out")
|
||||||
|
|
||||||
|
if not os.path.exists("./out/categoricals.csv"):
|
||||||
|
df = categorize()
|
||||||
|
else:
|
||||||
|
df = pd.read_csv("./out/categoricals.csv", index_col=0)
|
||||||
|
|
||||||
|
to_average = [
|
||||||
|
"Genre 1",
|
||||||
|
"Genre 2",
|
||||||
|
"Genre 3",
|
||||||
|
]
|
||||||
|
|
||||||
|
df["Genre 1"] = pd.Categorical(df["Genre 1"])
|
||||||
|
df["Genre 2"] = pd.Categorical(df["Genre 2"])
|
||||||
|
df["Genre 3"] = pd.Categorical(df["Genre 3"])
|
||||||
|
|
||||||
|
genres = np.unique(
|
||||||
|
np.concatenate(
|
||||||
|
[
|
||||||
|
df["Genre 1"].cat.categories,
|
||||||
|
df["Genre 2"].cat.categories,
|
||||||
|
df["Genre 3"].cat.categories,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# combine all genre columns into one, and take the mean of the Gross for each genre
|
||||||
|
genre_means = pd.DataFrame(index=genres, columns=["Gross"])
|
||||||
|
for genre in genres:
|
||||||
|
genre_means.loc[genre] = df[
|
||||||
|
df[["Genre 1", "Genre 2", "Genre 3"]].eq(genre).any(axis=1)
|
||||||
|
]["Gross"].mean()
|
||||||
|
|
||||||
|
genre_means.dropna(inplace=True)
|
||||||
|
genre_means.sort_values("Gross", ascending=False, inplace=True)
|
||||||
|
|
||||||
|
print(genre_means)
|
||||||
|
genre_means.to_csv("./out/genre_means.csv")
|
||||||
Reference in New Issue
Block a user