Spaces:
Sleeping
Sleeping
Deploy gradio movie revenue app with model and preprocessing
Browse files- app.py +123 -0
- components/__init__.py +7 -0
- components/__pycache__/__init__.cpython-310.pyc +0 -0
- components/__pycache__/examples.cpython-310.pyc +0 -0
- components/__pycache__/inputs.cpython-310.pyc +0 -0
- components/__pycache__/prediction.cpython-310.pyc +0 -0
- components/examples.py +83 -0
- components/inputs.py +158 -0
- components/prediction.py +92 -0
- model/final_model.pkl +3 -0
- requirements.txt +4 -0
- src/__pycache__/preprocess.cpython-310.pyc +0 -0
- src/preprocess.py +157 -0
app.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Movie Box Office Revenue Predictor - Gradio Web Application."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Any
|
| 7 |
+
|
| 8 |
+
import gradio as gr
|
| 9 |
+
|
| 10 |
+
from components import create_input_form, get_example_data, predict_revenue_from_form
|
| 11 |
+
from src.preprocess import load_model, parse_feature_options
|
| 12 |
+
|
| 13 |
+
# Configuration
|
| 14 |
+
MODEL_PATH = Path("model/final_model.pkl")
|
| 15 |
+
|
| 16 |
+
# Global state
|
| 17 |
+
MODEL: Any | None = None
|
| 18 |
+
MODEL_ERROR: str | None = None
|
| 19 |
+
FEATURE_OPTIONS: dict[str, list[str]] = {}
|
| 20 |
+
|
| 21 |
+
# Initialize model
|
| 22 |
+
try:
|
| 23 |
+
MODEL = load_model(MODEL_PATH)
|
| 24 |
+
FEATURE_OPTIONS = parse_feature_options(list(MODEL.feature_names_in_))
|
| 25 |
+
except Exception as exc:
|
| 26 |
+
MODEL_ERROR = str(exc)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def build_app() -> gr.Blocks:
|
| 30 |
+
"""Build and configure the Gradio interface."""
|
| 31 |
+
|
| 32 |
+
with gr.Blocks(
|
| 33 |
+
title="🎬 Movie Revenue Predictor",
|
| 34 |
+
) as app:
|
| 35 |
+
|
| 36 |
+
# Header
|
| 37 |
+
with gr.Row():
|
| 38 |
+
gr.Markdown(
|
| 39 |
+
"""
|
| 40 |
+
# 🎬 Movie Box Office Revenue Predictor
|
| 41 |
+
|
| 42 |
+
Predict movie revenue using machine learning trained on historical box office data.
|
| 43 |
+
Enter movie details below and get instant revenue predictions with profitability analysis.
|
| 44 |
+
""",
|
| 45 |
+
elem_classes=["header"]
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
# Model status
|
| 49 |
+
if MODEL is None:
|
| 50 |
+
gr.Warning(f"⚠️ Model loading error: {MODEL_ERROR}")
|
| 51 |
+
|
| 52 |
+
# Main content
|
| 53 |
+
with gr.Row():
|
| 54 |
+
with gr.Column(scale=3):
|
| 55 |
+
# Input form
|
| 56 |
+
input_dict, input_list = create_input_form(FEATURE_OPTIONS)
|
| 57 |
+
|
| 58 |
+
# Action buttons
|
| 59 |
+
with gr.Row():
|
| 60 |
+
predict_btn = gr.Button(
|
| 61 |
+
"🎯 Predict Revenue",
|
| 62 |
+
variant="primary",
|
| 63 |
+
scale=2,
|
| 64 |
+
size="lg"
|
| 65 |
+
)
|
| 66 |
+
clear_btn = gr.ClearButton(
|
| 67 |
+
components=input_list,
|
| 68 |
+
value="🔄 Clear",
|
| 69 |
+
scale=1,
|
| 70 |
+
size="lg"
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# Examples
|
| 74 |
+
gr.Markdown("### 📝 Quick Examples")
|
| 75 |
+
gr.Examples(
|
| 76 |
+
examples=get_example_data(FEATURE_OPTIONS),
|
| 77 |
+
inputs=input_list,
|
| 78 |
+
label="Click an example to auto-fill the form",
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
with gr.Column(scale=2):
|
| 82 |
+
gr.Markdown("### 📊 Prediction Results")
|
| 83 |
+
|
| 84 |
+
# Output displays
|
| 85 |
+
prediction_output = gr.Markdown(
|
| 86 |
+
"💡 Fill in the form and click **Predict Revenue** to see results.",
|
| 87 |
+
elem_classes=["output-box"]
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
profitability_output = gr.Markdown(
|
| 91 |
+
"",
|
| 92 |
+
elem_classes=["output-box"]
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# Event handlers
|
| 96 |
+
predict_btn.click(
|
| 97 |
+
fn=lambda *args: predict_revenue_from_form(MODEL, *args),
|
| 98 |
+
inputs=input_list,
|
| 99 |
+
outputs=[prediction_output, profitability_output],
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
return app
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def main():
|
| 106 |
+
"""Launch the application."""
|
| 107 |
+
theme = gr.themes.Default(
|
| 108 |
+
primary_hue="zinc",
|
| 109 |
+
secondary_hue="slate",
|
| 110 |
+
neutral_hue="slate",
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
app = build_app()
|
| 114 |
+
app.launch(
|
| 115 |
+
server_name="0.0.0.0",
|
| 116 |
+
server_port=7860,
|
| 117 |
+
share=False,
|
| 118 |
+
theme=theme,
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
if __name__ == "__main__":
|
| 123 |
+
main()
|
components/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""UI components for the movie revenue predictor."""
|
| 2 |
+
|
| 3 |
+
from components.inputs import create_input_form
|
| 4 |
+
from components.prediction import predict_revenue_from_form
|
| 5 |
+
from components.examples import get_example_data
|
| 6 |
+
|
| 7 |
+
__all__ = ["create_input_form", "predict_revenue_from_form", "get_example_data"]
|
components/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (404 Bytes). View file
|
|
|
components/__pycache__/examples.cpython-310.pyc
ADDED
|
Binary file (1.78 kB). View file
|
|
|
components/__pycache__/inputs.cpython-310.pyc
ADDED
|
Binary file (3.48 kB). View file
|
|
|
components/__pycache__/prediction.cpython-310.pyc
ADDED
|
Binary file (2.17 kB). View file
|
|
|
components/examples.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Example data for the movie predictor."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def get_example_data(feature_options: dict[str, list[str]]) -> list[list[Any]]:
|
| 9 |
+
"""Generate example movie data for quick testing."""
|
| 10 |
+
|
| 11 |
+
genres = feature_options.get("genres", [])[:3]
|
| 12 |
+
companies = feature_options.get("production_companies", [])[:2]
|
| 13 |
+
keywords = feature_options.get("Keywords", [])[:3]
|
| 14 |
+
cast = feature_options.get("cast", [])[:2]
|
| 15 |
+
|
| 16 |
+
return [
|
| 17 |
+
[
|
| 18 |
+
# Blockbuster Example (similar to Avengers-type movies)
|
| 19 |
+
200_000_000, # budget
|
| 20 |
+
64.0, # popularity
|
| 21 |
+
143, # runtime
|
| 22 |
+
"2019-04-26", # release_date
|
| 23 |
+
"en", # original_language
|
| 24 |
+
True, # belongs_to_collection
|
| 25 |
+
True, # homepage
|
| 26 |
+
"The Final Showdown", # title
|
| 27 |
+
"Whatever it takes", # tagline
|
| 28 |
+
"After devastating events, heroes must assemble once more to undo chaos and restore order to the universe.", # overview
|
| 29 |
+
25, # num_of_cast
|
| 30 |
+
50, # num_of_crew
|
| 31 |
+
8, # gender_cast_1 (female)
|
| 32 |
+
15, # gender_cast_2 (male)
|
| 33 |
+
2, # count_cast_other
|
| 34 |
+
genres[:3] if len(genres) >= 3 else genres, # genres
|
| 35 |
+
companies[:2] if len(companies) >= 2 else companies, # production_companies
|
| 36 |
+
keywords[:5] if len(keywords) >= 5 else keywords, # keywords
|
| 37 |
+
cast[:5] if len(cast) >= 5 else cast, # cast
|
| 38 |
+
],
|
| 39 |
+
[
|
| 40 |
+
# Mid-Budget Comedy
|
| 41 |
+
40_000_000, # budget
|
| 42 |
+
8.2, # popularity
|
| 43 |
+
113, # runtime
|
| 44 |
+
"2015-08-06", # release_date
|
| 45 |
+
"en", # original_language
|
| 46 |
+
True, # belongs_to_collection
|
| 47 |
+
True, # homepage
|
| 48 |
+
"Royal Wedding", # title
|
| 49 |
+
"Royalty has its responsibilities", # tagline
|
| 50 |
+
"A young princess must navigate royal duties while finding true love before her coronation.", # overview
|
| 51 |
+
20, # num_of_cast
|
| 52 |
+
30, # num_of_crew
|
| 53 |
+
12, # gender_cast_1 (female)
|
| 54 |
+
8, # gender_cast_2 (male)
|
| 55 |
+
0, # count_cast_other
|
| 56 |
+
genres[:2] if len(genres) >= 2 else genres, # genres
|
| 57 |
+
companies[:1] if companies else [], # production_companies
|
| 58 |
+
keywords[:3] if len(keywords) >= 3 else keywords, # keywords
|
| 59 |
+
cast[:3] if len(cast) >= 3 else cast, # cast
|
| 60 |
+
],
|
| 61 |
+
[
|
| 62 |
+
# Low-Budget Thriller
|
| 63 |
+
3_300_000, # budget
|
| 64 |
+
35.0, # popularity
|
| 65 |
+
105, # runtime
|
| 66 |
+
"2014-10-10", # release_date
|
| 67 |
+
"en", # original_language
|
| 68 |
+
False, # belongs_to_collection
|
| 69 |
+
True, # homepage
|
| 70 |
+
"Perfect Rhythm", # title
|
| 71 |
+
"Greatness comes at a price", # tagline
|
| 72 |
+
"A talented musician pushes beyond limits under the guidance of a demanding instructor.", # overview
|
| 73 |
+
15, # num_of_cast
|
| 74 |
+
25, # num_of_crew
|
| 75 |
+
3, # gender_cast_1 (female)
|
| 76 |
+
10, # gender_cast_2 (male)
|
| 77 |
+
2, # count_cast_other
|
| 78 |
+
genres[:1] if genres else [], # genres
|
| 79 |
+
companies[:2] if len(companies) >= 2 else companies, # production_companies
|
| 80 |
+
keywords[:4] if len(keywords) >= 4 else keywords, # keywords
|
| 81 |
+
cast[:2] if len(cast) >= 2 else cast, # cast
|
| 82 |
+
],
|
| 83 |
+
]
|
components/inputs.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Input form components for the movie predictor."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def create_input_form(feature_options: dict[str, list[str]]) -> tuple[dict[str, gr.components.Component], list[gr.components.Component]]:
|
| 9 |
+
"""Create the input form with all movie attributes."""
|
| 10 |
+
|
| 11 |
+
inputs = {}
|
| 12 |
+
|
| 13 |
+
with gr.Row():
|
| 14 |
+
with gr.Column(scale=2):
|
| 15 |
+
gr.Markdown("### 📊 Core Metrics")
|
| 16 |
+
inputs["budget"] = gr.Number(
|
| 17 |
+
label="Budget ($)",
|
| 18 |
+
value=50_000_000,
|
| 19 |
+
info="Production budget in USD"
|
| 20 |
+
)
|
| 21 |
+
inputs["popularity"] = gr.Number(
|
| 22 |
+
label="Popularity Score",
|
| 23 |
+
value=10.0,
|
| 24 |
+
info="Trending score (0-100)"
|
| 25 |
+
)
|
| 26 |
+
inputs["runtime"] = gr.Number(
|
| 27 |
+
label="Runtime (minutes)",
|
| 28 |
+
value=105,
|
| 29 |
+
info="Movie duration"
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
with gr.Column(scale=2):
|
| 33 |
+
gr.Markdown("### 📅 Release Info")
|
| 34 |
+
inputs["release_date"] = gr.Textbox(
|
| 35 |
+
label="Release Date",
|
| 36 |
+
value="2015-06-12",
|
| 37 |
+
info="Format: YYYY-MM-DD"
|
| 38 |
+
)
|
| 39 |
+
inputs["original_language"] = gr.Dropdown(
|
| 40 |
+
label="Original Language",
|
| 41 |
+
choices=["en", "zh", "ja", "other"],
|
| 42 |
+
value="en"
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
with gr.Column(scale=1):
|
| 46 |
+
gr.Markdown("### ✓ Flags")
|
| 47 |
+
inputs["belongs_to_collection"] = gr.Checkbox(
|
| 48 |
+
label="Part of Collection",
|
| 49 |
+
value=False
|
| 50 |
+
)
|
| 51 |
+
inputs["homepage"] = gr.Checkbox(
|
| 52 |
+
label="Has Homepage",
|
| 53 |
+
value=False
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
with gr.Row():
|
| 57 |
+
with gr.Column():
|
| 58 |
+
gr.Markdown("### 🎬 Movie Details")
|
| 59 |
+
inputs["title"] = gr.Textbox(
|
| 60 |
+
label="Title",
|
| 61 |
+
value="Sample Movie"
|
| 62 |
+
)
|
| 63 |
+
inputs["tagline"] = gr.Textbox(
|
| 64 |
+
label="Tagline",
|
| 65 |
+
value="A new story begins"
|
| 66 |
+
)
|
| 67 |
+
inputs["overview"] = gr.Textbox(
|
| 68 |
+
label="Overview",
|
| 69 |
+
value="A movie about discovery, conflict, and ambition.",
|
| 70 |
+
lines=3
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
with gr.Row():
|
| 74 |
+
with gr.Column():
|
| 75 |
+
gr.Markdown("### 👥 Cast & Crew Statistics")
|
| 76 |
+
gr.Markdown("*These are typically derived from cast/crew data. Estimate if unknown.*")
|
| 77 |
+
with gr.Row():
|
| 78 |
+
inputs["num_of_cast"] = gr.Number(
|
| 79 |
+
label="Total Cast Members",
|
| 80 |
+
value=10,
|
| 81 |
+
info="Number of actors"
|
| 82 |
+
)
|
| 83 |
+
inputs["num_of_crew"] = gr.Number(
|
| 84 |
+
label="Total Crew Members",
|
| 85 |
+
value=10,
|
| 86 |
+
info="Number of crew"
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
with gr.Row():
|
| 90 |
+
inputs["gender_cast_1"] = gr.Number(
|
| 91 |
+
label="Female Cast (Gender=1)",
|
| 92 |
+
value=4,
|
| 93 |
+
info="Number of female actors"
|
| 94 |
+
)
|
| 95 |
+
inputs["gender_cast_2"] = gr.Number(
|
| 96 |
+
label="Male Cast (Gender=2)",
|
| 97 |
+
value=5,
|
| 98 |
+
info="Number of male actors"
|
| 99 |
+
)
|
| 100 |
+
inputs["count_cast_other"] = gr.Number(
|
| 101 |
+
label="Other/Unknown Gender",
|
| 102 |
+
value=1,
|
| 103 |
+
info="Other gender identities"
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
with gr.Accordion("🎭 Optional: Genres, Companies & More", open=False):
|
| 107 |
+
with gr.Row():
|
| 108 |
+
inputs["genres"] = gr.Dropdown(
|
| 109 |
+
label="Genres",
|
| 110 |
+
choices=feature_options.get("genres", []),
|
| 111 |
+
multiselect=True,
|
| 112 |
+
info="Select one or more genres"
|
| 113 |
+
)
|
| 114 |
+
inputs["production_companies"] = gr.Dropdown(
|
| 115 |
+
label="Production Companies",
|
| 116 |
+
choices=feature_options.get("production_companies", []),
|
| 117 |
+
multiselect=True,
|
| 118 |
+
info="Select production companies"
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
with gr.Row():
|
| 122 |
+
inputs["keywords"] = gr.Dropdown(
|
| 123 |
+
label="Keywords",
|
| 124 |
+
choices=feature_options.get("Keywords", []),
|
| 125 |
+
multiselect=True,
|
| 126 |
+
info="Content keywords"
|
| 127 |
+
)
|
| 128 |
+
inputs["cast"] = gr.Dropdown(
|
| 129 |
+
label="Notable Cast",
|
| 130 |
+
choices=feature_options.get("cast", []),
|
| 131 |
+
multiselect=True,
|
| 132 |
+
info="Famous actors"
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
# Return both dict and ordered list for compatibility
|
| 136 |
+
ordered_list = [
|
| 137 |
+
inputs["budget"],
|
| 138 |
+
inputs["popularity"],
|
| 139 |
+
inputs["runtime"],
|
| 140 |
+
inputs["release_date"],
|
| 141 |
+
inputs["original_language"],
|
| 142 |
+
inputs["belongs_to_collection"],
|
| 143 |
+
inputs["homepage"],
|
| 144 |
+
inputs["title"],
|
| 145 |
+
inputs["tagline"],
|
| 146 |
+
inputs["overview"],
|
| 147 |
+
inputs["num_of_cast"],
|
| 148 |
+
inputs["num_of_crew"],
|
| 149 |
+
inputs["gender_cast_1"],
|
| 150 |
+
inputs["gender_cast_2"],
|
| 151 |
+
inputs["count_cast_other"],
|
| 152 |
+
inputs["genres"],
|
| 153 |
+
inputs["production_companies"],
|
| 154 |
+
inputs["keywords"],
|
| 155 |
+
inputs["cast"],
|
| 156 |
+
]
|
| 157 |
+
|
| 158 |
+
return inputs, ordered_list
|
components/prediction.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Prediction logic and output formatting."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
from src.preprocess import predict_revenue
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
INPUT_ORDER = [
|
| 11 |
+
"budget",
|
| 12 |
+
"popularity",
|
| 13 |
+
"runtime",
|
| 14 |
+
"release_date",
|
| 15 |
+
"original_language",
|
| 16 |
+
"belongs_to_collection",
|
| 17 |
+
"homepage",
|
| 18 |
+
"title",
|
| 19 |
+
"tagline",
|
| 20 |
+
"overview",
|
| 21 |
+
"num_of_cast",
|
| 22 |
+
"num_of_crew",
|
| 23 |
+
"gender_cast_1",
|
| 24 |
+
"gender_cast_2",
|
| 25 |
+
"count_cast_other",
|
| 26 |
+
"genres",
|
| 27 |
+
"production_companies",
|
| 28 |
+
"keywords",
|
| 29 |
+
"cast",
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def format_currency(value: float) -> str:
|
| 34 |
+
"""Format a number as currency."""
|
| 35 |
+
if value >= 1_000_000_000:
|
| 36 |
+
return f"${value / 1_000_000_000:.2f}B"
|
| 37 |
+
elif value >= 1_000_000:
|
| 38 |
+
return f"${value / 1_000_000:.2f}M"
|
| 39 |
+
else:
|
| 40 |
+
return f"${value:,.0f}"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def predict_revenue_from_form(model: Any, *values: Any) -> tuple[str, str]:
|
| 44 |
+
"""
|
| 45 |
+
Predict revenue from form inputs and return formatted results.
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
Tuple of (prediction_text, profitability_text)
|
| 49 |
+
"""
|
| 50 |
+
if model is None:
|
| 51 |
+
return "❌ Model not available", ""
|
| 52 |
+
|
| 53 |
+
# Build payload from form values
|
| 54 |
+
payload = dict(zip(INPUT_ORDER, values))
|
| 55 |
+
payload["belongs_to_collection"] = int(bool(payload.get("belongs_to_collection")))
|
| 56 |
+
payload["homepage"] = int(bool(payload.get("homepage")))
|
| 57 |
+
payload["has_tagline"] = 1 if str(payload.get("tagline") or "").strip() else 0
|
| 58 |
+
|
| 59 |
+
try:
|
| 60 |
+
prediction = predict_revenue(model, payload)
|
| 61 |
+
budget = float(payload.get("budget") or 0.0)
|
| 62 |
+
|
| 63 |
+
# Format prediction
|
| 64 |
+
prediction_text = f"## 💰 Predicted Revenue\n### {format_currency(prediction)}"
|
| 65 |
+
|
| 66 |
+
# Calculate ROI
|
| 67 |
+
if budget > 0:
|
| 68 |
+
roi = (prediction - budget) / budget * 100
|
| 69 |
+
multiple = prediction / budget
|
| 70 |
+
|
| 71 |
+
if roi > 100:
|
| 72 |
+
status = "🟢 **Highly Profitable**"
|
| 73 |
+
elif roi > 0:
|
| 74 |
+
status = "🟡 **Profitable**"
|
| 75 |
+
else:
|
| 76 |
+
status = "🔴 **Loss Expected**"
|
| 77 |
+
|
| 78 |
+
profitability_text = f"""
|
| 79 |
+
{status}
|
| 80 |
+
|
| 81 |
+
- **Budget:** {format_currency(budget)}
|
| 82 |
+
- **Revenue Multiple:** {multiple:.2f}x
|
| 83 |
+
- **ROI:** {roi:+.1f}%
|
| 84 |
+
- **Estimated Profit:** {format_currency(prediction - budget)}
|
| 85 |
+
"""
|
| 86 |
+
else:
|
| 87 |
+
profitability_text = "ℹ️ Enter a budget to see profitability analysis"
|
| 88 |
+
|
| 89 |
+
return prediction_text, profitability_text
|
| 90 |
+
|
| 91 |
+
except Exception as exc:
|
| 92 |
+
return f"❌ Prediction Error\n```\n{str(exc)}\n```", ""
|
model/final_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:674d26ddae3d3f076f531fea4cf8f0e8676075f4e082fb3dd3d8467864e064c6
|
| 3 |
+
size 21812058
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==6.5.1
|
| 2 |
+
numpy==2.2.6
|
| 3 |
+
pandas==2.3.3
|
| 4 |
+
scikit-learn==1.6.1
|
src/__pycache__/preprocess.cpython-310.pyc
ADDED
|
Binary file (5.21 kB). View file
|
|
|
src/preprocess.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import math
|
| 4 |
+
import pickle
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import Any
|
| 8 |
+
|
| 9 |
+
import numpy as np
|
| 10 |
+
import pandas as pd
|
| 11 |
+
|
| 12 |
+
LANGUAGE_MAPPING = {"en": 1, "zh": 2, "ja": 3}
|
| 13 |
+
|
| 14 |
+
PREFIX_TO_FORM_KEY = {
|
| 15 |
+
"genres": "genres",
|
| 16 |
+
"production_companies": "production_companies",
|
| 17 |
+
"Keywords": "keywords",
|
| 18 |
+
"cast": "cast",
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def load_model(model_path: str | Path) -> Any:
|
| 23 |
+
with Path(model_path).open("rb") as file:
|
| 24 |
+
return pickle.load(file)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_model_feature_names(model: Any) -> list[str]:
|
| 28 |
+
if not hasattr(model, "feature_names_in_"):
|
| 29 |
+
raise ValueError("Model does not expose feature_names_in_.")
|
| 30 |
+
return list(model.feature_names_in_)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def count_words(text: str | None) -> int:
|
| 34 |
+
if text is None:
|
| 35 |
+
return 0
|
| 36 |
+
normalized = str(text).strip()
|
| 37 |
+
if not normalized:
|
| 38 |
+
return 0
|
| 39 |
+
return len(normalized.split())
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def runtime_category_code(runtime: float) -> int:
|
| 43 |
+
if runtime < 90:
|
| 44 |
+
return 0
|
| 45 |
+
if runtime < 120:
|
| 46 |
+
return 1
|
| 47 |
+
return 2
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def parse_release_date(value: str | None) -> datetime:
|
| 51 |
+
if not value:
|
| 52 |
+
return datetime(2010, 1, 1)
|
| 53 |
+
try:
|
| 54 |
+
return datetime.strptime(value, "%Y-%m-%d")
|
| 55 |
+
except ValueError as exc:
|
| 56 |
+
raise ValueError("release_date must be in YYYY-MM-DD format.") from exc
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def parse_feature_options(feature_names: list[str]) -> dict[str, list[str]]:
|
| 60 |
+
options: dict[str, set[str]] = {k: set() for k in PREFIX_TO_FORM_KEY}
|
| 61 |
+
|
| 62 |
+
for name in feature_names:
|
| 63 |
+
for prefix in options:
|
| 64 |
+
key = f"{prefix}_"
|
| 65 |
+
if name.startswith(key) and name != f"{prefix}_other":
|
| 66 |
+
options[prefix].add(name[len(key) :])
|
| 67 |
+
|
| 68 |
+
return {k: sorted(v) for k, v in options.items()}
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def _to_float(value: Any, default: float = 0.0) -> float:
|
| 72 |
+
try:
|
| 73 |
+
if value is None:
|
| 74 |
+
return default
|
| 75 |
+
return float(value)
|
| 76 |
+
except (TypeError, ValueError):
|
| 77 |
+
return default
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def _to_int(value: Any, default: int = 0) -> int:
|
| 81 |
+
try:
|
| 82 |
+
if value is None:
|
| 83 |
+
return default
|
| 84 |
+
return int(value)
|
| 85 |
+
except (TypeError, ValueError):
|
| 86 |
+
return default
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def build_feature_row(form_data: dict[str, Any], feature_names: list[str]) -> pd.DataFrame:
|
| 90 |
+
row = {name: 0.0 for name in feature_names}
|
| 91 |
+
|
| 92 |
+
budget = max(_to_float(form_data.get("budget"), 0.0), 0.0)
|
| 93 |
+
popularity = max(_to_float(form_data.get("popularity"), 0.0), 0.0)
|
| 94 |
+
runtime = max(_to_float(form_data.get("runtime"), 0.0), 0.0)
|
| 95 |
+
|
| 96 |
+
release_date = parse_release_date(form_data.get("release_date"))
|
| 97 |
+
release_season = ((release_date.month % 12) + 3) // 3
|
| 98 |
+
|
| 99 |
+
title_text = str(form_data.get("title") or "")
|
| 100 |
+
tagline_text = str(form_data.get("tagline") or "")
|
| 101 |
+
overview_text = str(form_data.get("overview") or "")
|
| 102 |
+
|
| 103 |
+
values = {
|
| 104 |
+
"belongs_to_collection": _to_int(form_data.get("belongs_to_collection"), 0),
|
| 105 |
+
"homepage": _to_int(form_data.get("homepage"), 0),
|
| 106 |
+
"has_tagline": _to_int(form_data.get("has_tagline"), 1 if tagline_text.strip() else 0),
|
| 107 |
+
"original_language": LANGUAGE_MAPPING.get(str(form_data.get("original_language") or "").lower(), 0),
|
| 108 |
+
"runtime": runtime,
|
| 109 |
+
"num_of_cast": _to_float(form_data.get("num_of_cast"), 0.0),
|
| 110 |
+
"num_of_crew": _to_float(form_data.get("num_of_crew"), 0.0),
|
| 111 |
+
"gender_cast_1": _to_float(form_data.get("gender_cast_1"), 0.0),
|
| 112 |
+
"gender_cast_2": _to_float(form_data.get("gender_cast_2"), 0.0),
|
| 113 |
+
"count_cast_other": _to_float(form_data.get("count_cast_other"), 0.0),
|
| 114 |
+
"title_word_count": _to_float(form_data.get("title_word_count"), count_words(title_text)),
|
| 115 |
+
"tag_word_count": _to_float(form_data.get("tag_word_count"), count_words(tagline_text)),
|
| 116 |
+
"overview_word_count": _to_float(form_data.get("overview_word_count"), count_words(overview_text)),
|
| 117 |
+
"release_year": release_date.year,
|
| 118 |
+
"release_month": release_date.month,
|
| 119 |
+
"release_season": release_season,
|
| 120 |
+
"runtime_category": runtime_category_code(runtime),
|
| 121 |
+
"budget_log": math.log1p(budget),
|
| 122 |
+
"popularity_log": math.log1p(popularity),
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
for key, value in values.items():
|
| 126 |
+
if key in row:
|
| 127 |
+
row[key] = value
|
| 128 |
+
|
| 129 |
+
for prefix, form_key in PREFIX_TO_FORM_KEY.items():
|
| 130 |
+
selected = form_data.get(form_key) or []
|
| 131 |
+
if not isinstance(selected, list):
|
| 132 |
+
selected = [selected]
|
| 133 |
+
|
| 134 |
+
known = 0
|
| 135 |
+
for item in selected:
|
| 136 |
+
col = f"{prefix}_{item}"
|
| 137 |
+
if col in row:
|
| 138 |
+
row[col] = 1.0
|
| 139 |
+
known += 1
|
| 140 |
+
|
| 141 |
+
num_col = f"num_of_{prefix}"
|
| 142 |
+
if num_col in row:
|
| 143 |
+
row[num_col] = float(len(selected))
|
| 144 |
+
|
| 145 |
+
other_col = f"{prefix}_other"
|
| 146 |
+
if other_col in row:
|
| 147 |
+
row[other_col] = 1.0 if len(selected) > known else 0.0
|
| 148 |
+
|
| 149 |
+
df = pd.DataFrame([[row[name] for name in feature_names]], columns=feature_names)
|
| 150 |
+
return df.replace([np.inf, -np.inf], 0).fillna(0)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def predict_revenue(model: Any, form_data: dict[str, Any]) -> float:
|
| 154 |
+
feature_names = get_model_feature_names(model)
|
| 155 |
+
frame = build_feature_row(form_data, feature_names)
|
| 156 |
+
pred = model.predict(frame)[0]
|
| 157 |
+
return float(pred)
|