Kacemath commited on
Commit
b490ee7
·
1 Parent(s): 7f14f0b

Deploy gradio movie revenue app with model and preprocessing

Browse files
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Movie Box Office Revenue Predictor - Gradio Web Application."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import gradio as gr
9
+
10
+ from components import create_input_form, get_example_data, predict_revenue_from_form
11
+ from src.preprocess import load_model, parse_feature_options
12
+
13
+ # Configuration
14
+ MODEL_PATH = Path("model/final_model.pkl")
15
+
16
+ # Global state
17
+ MODEL: Any | None = None
18
+ MODEL_ERROR: str | None = None
19
+ FEATURE_OPTIONS: dict[str, list[str]] = {}
20
+
21
+ # Initialize model
22
+ try:
23
+ MODEL = load_model(MODEL_PATH)
24
+ FEATURE_OPTIONS = parse_feature_options(list(MODEL.feature_names_in_))
25
+ except Exception as exc:
26
+ MODEL_ERROR = str(exc)
27
+
28
+
29
+ def build_app() -> gr.Blocks:
30
+ """Build and configure the Gradio interface."""
31
+
32
+ with gr.Blocks(
33
+ title="🎬 Movie Revenue Predictor",
34
+ ) as app:
35
+
36
+ # Header
37
+ with gr.Row():
38
+ gr.Markdown(
39
+ """
40
+ # 🎬 Movie Box Office Revenue Predictor
41
+
42
+ Predict movie revenue using machine learning trained on historical box office data.
43
+ Enter movie details below and get instant revenue predictions with profitability analysis.
44
+ """,
45
+ elem_classes=["header"]
46
+ )
47
+
48
+ # Model status
49
+ if MODEL is None:
50
+ gr.Warning(f"⚠️ Model loading error: {MODEL_ERROR}")
51
+
52
+ # Main content
53
+ with gr.Row():
54
+ with gr.Column(scale=3):
55
+ # Input form
56
+ input_dict, input_list = create_input_form(FEATURE_OPTIONS)
57
+
58
+ # Action buttons
59
+ with gr.Row():
60
+ predict_btn = gr.Button(
61
+ "🎯 Predict Revenue",
62
+ variant="primary",
63
+ scale=2,
64
+ size="lg"
65
+ )
66
+ clear_btn = gr.ClearButton(
67
+ components=input_list,
68
+ value="🔄 Clear",
69
+ scale=1,
70
+ size="lg"
71
+ )
72
+
73
+ # Examples
74
+ gr.Markdown("### 📝 Quick Examples")
75
+ gr.Examples(
76
+ examples=get_example_data(FEATURE_OPTIONS),
77
+ inputs=input_list,
78
+ label="Click an example to auto-fill the form",
79
+ )
80
+
81
+ with gr.Column(scale=2):
82
+ gr.Markdown("### 📊 Prediction Results")
83
+
84
+ # Output displays
85
+ prediction_output = gr.Markdown(
86
+ "💡 Fill in the form and click **Predict Revenue** to see results.",
87
+ elem_classes=["output-box"]
88
+ )
89
+
90
+ profitability_output = gr.Markdown(
91
+ "",
92
+ elem_classes=["output-box"]
93
+ )
94
+
95
+ # Event handlers
96
+ predict_btn.click(
97
+ fn=lambda *args: predict_revenue_from_form(MODEL, *args),
98
+ inputs=input_list,
99
+ outputs=[prediction_output, profitability_output],
100
+ )
101
+
102
+ return app
103
+
104
+
105
+ def main():
106
+ """Launch the application."""
107
+ theme = gr.themes.Default(
108
+ primary_hue="zinc",
109
+ secondary_hue="slate",
110
+ neutral_hue="slate",
111
+ )
112
+
113
+ app = build_app()
114
+ app.launch(
115
+ server_name="0.0.0.0",
116
+ server_port=7860,
117
+ share=False,
118
+ theme=theme,
119
+ )
120
+
121
+
122
+ if __name__ == "__main__":
123
+ main()
components/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ """UI components for the movie revenue predictor."""
2
+
3
+ from components.inputs import create_input_form
4
+ from components.prediction import predict_revenue_from_form
5
+ from components.examples import get_example_data
6
+
7
+ __all__ = ["create_input_form", "predict_revenue_from_form", "get_example_data"]
components/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (404 Bytes). View file
 
components/__pycache__/examples.cpython-310.pyc ADDED
Binary file (1.78 kB). View file
 
components/__pycache__/inputs.cpython-310.pyc ADDED
Binary file (3.48 kB). View file
 
components/__pycache__/prediction.cpython-310.pyc ADDED
Binary file (2.17 kB). View file
 
components/examples.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Example data for the movie predictor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+
8
+ def get_example_data(feature_options: dict[str, list[str]]) -> list[list[Any]]:
9
+ """Generate example movie data for quick testing."""
10
+
11
+ genres = feature_options.get("genres", [])[:3]
12
+ companies = feature_options.get("production_companies", [])[:2]
13
+ keywords = feature_options.get("Keywords", [])[:3]
14
+ cast = feature_options.get("cast", [])[:2]
15
+
16
+ return [
17
+ [
18
+ # Blockbuster Example (similar to Avengers-type movies)
19
+ 200_000_000, # budget
20
+ 64.0, # popularity
21
+ 143, # runtime
22
+ "2019-04-26", # release_date
23
+ "en", # original_language
24
+ True, # belongs_to_collection
25
+ True, # homepage
26
+ "The Final Showdown", # title
27
+ "Whatever it takes", # tagline
28
+ "After devastating events, heroes must assemble once more to undo chaos and restore order to the universe.", # overview
29
+ 25, # num_of_cast
30
+ 50, # num_of_crew
31
+ 8, # gender_cast_1 (female)
32
+ 15, # gender_cast_2 (male)
33
+ 2, # count_cast_other
34
+ genres[:3] if len(genres) >= 3 else genres, # genres
35
+ companies[:2] if len(companies) >= 2 else companies, # production_companies
36
+ keywords[:5] if len(keywords) >= 5 else keywords, # keywords
37
+ cast[:5] if len(cast) >= 5 else cast, # cast
38
+ ],
39
+ [
40
+ # Mid-Budget Comedy
41
+ 40_000_000, # budget
42
+ 8.2, # popularity
43
+ 113, # runtime
44
+ "2015-08-06", # release_date
45
+ "en", # original_language
46
+ True, # belongs_to_collection
47
+ True, # homepage
48
+ "Royal Wedding", # title
49
+ "Royalty has its responsibilities", # tagline
50
+ "A young princess must navigate royal duties while finding true love before her coronation.", # overview
51
+ 20, # num_of_cast
52
+ 30, # num_of_crew
53
+ 12, # gender_cast_1 (female)
54
+ 8, # gender_cast_2 (male)
55
+ 0, # count_cast_other
56
+ genres[:2] if len(genres) >= 2 else genres, # genres
57
+ companies[:1] if companies else [], # production_companies
58
+ keywords[:3] if len(keywords) >= 3 else keywords, # keywords
59
+ cast[:3] if len(cast) >= 3 else cast, # cast
60
+ ],
61
+ [
62
+ # Low-Budget Thriller
63
+ 3_300_000, # budget
64
+ 35.0, # popularity
65
+ 105, # runtime
66
+ "2014-10-10", # release_date
67
+ "en", # original_language
68
+ False, # belongs_to_collection
69
+ True, # homepage
70
+ "Perfect Rhythm", # title
71
+ "Greatness comes at a price", # tagline
72
+ "A talented musician pushes beyond limits under the guidance of a demanding instructor.", # overview
73
+ 15, # num_of_cast
74
+ 25, # num_of_crew
75
+ 3, # gender_cast_1 (female)
76
+ 10, # gender_cast_2 (male)
77
+ 2, # count_cast_other
78
+ genres[:1] if genres else [], # genres
79
+ companies[:2] if len(companies) >= 2 else companies, # production_companies
80
+ keywords[:4] if len(keywords) >= 4 else keywords, # keywords
81
+ cast[:2] if len(cast) >= 2 else cast, # cast
82
+ ],
83
+ ]
components/inputs.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Input form components for the movie predictor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import gradio as gr
6
+
7
+
8
+ def create_input_form(feature_options: dict[str, list[str]]) -> tuple[dict[str, gr.components.Component], list[gr.components.Component]]:
9
+ """Create the input form with all movie attributes."""
10
+
11
+ inputs = {}
12
+
13
+ with gr.Row():
14
+ with gr.Column(scale=2):
15
+ gr.Markdown("### 📊 Core Metrics")
16
+ inputs["budget"] = gr.Number(
17
+ label="Budget ($)",
18
+ value=50_000_000,
19
+ info="Production budget in USD"
20
+ )
21
+ inputs["popularity"] = gr.Number(
22
+ label="Popularity Score",
23
+ value=10.0,
24
+ info="Trending score (0-100)"
25
+ )
26
+ inputs["runtime"] = gr.Number(
27
+ label="Runtime (minutes)",
28
+ value=105,
29
+ info="Movie duration"
30
+ )
31
+
32
+ with gr.Column(scale=2):
33
+ gr.Markdown("### 📅 Release Info")
34
+ inputs["release_date"] = gr.Textbox(
35
+ label="Release Date",
36
+ value="2015-06-12",
37
+ info="Format: YYYY-MM-DD"
38
+ )
39
+ inputs["original_language"] = gr.Dropdown(
40
+ label="Original Language",
41
+ choices=["en", "zh", "ja", "other"],
42
+ value="en"
43
+ )
44
+
45
+ with gr.Column(scale=1):
46
+ gr.Markdown("### ✓ Flags")
47
+ inputs["belongs_to_collection"] = gr.Checkbox(
48
+ label="Part of Collection",
49
+ value=False
50
+ )
51
+ inputs["homepage"] = gr.Checkbox(
52
+ label="Has Homepage",
53
+ value=False
54
+ )
55
+
56
+ with gr.Row():
57
+ with gr.Column():
58
+ gr.Markdown("### 🎬 Movie Details")
59
+ inputs["title"] = gr.Textbox(
60
+ label="Title",
61
+ value="Sample Movie"
62
+ )
63
+ inputs["tagline"] = gr.Textbox(
64
+ label="Tagline",
65
+ value="A new story begins"
66
+ )
67
+ inputs["overview"] = gr.Textbox(
68
+ label="Overview",
69
+ value="A movie about discovery, conflict, and ambition.",
70
+ lines=3
71
+ )
72
+
73
+ with gr.Row():
74
+ with gr.Column():
75
+ gr.Markdown("### 👥 Cast & Crew Statistics")
76
+ gr.Markdown("*These are typically derived from cast/crew data. Estimate if unknown.*")
77
+ with gr.Row():
78
+ inputs["num_of_cast"] = gr.Number(
79
+ label="Total Cast Members",
80
+ value=10,
81
+ info="Number of actors"
82
+ )
83
+ inputs["num_of_crew"] = gr.Number(
84
+ label="Total Crew Members",
85
+ value=10,
86
+ info="Number of crew"
87
+ )
88
+
89
+ with gr.Row():
90
+ inputs["gender_cast_1"] = gr.Number(
91
+ label="Female Cast (Gender=1)",
92
+ value=4,
93
+ info="Number of female actors"
94
+ )
95
+ inputs["gender_cast_2"] = gr.Number(
96
+ label="Male Cast (Gender=2)",
97
+ value=5,
98
+ info="Number of male actors"
99
+ )
100
+ inputs["count_cast_other"] = gr.Number(
101
+ label="Other/Unknown Gender",
102
+ value=1,
103
+ info="Other gender identities"
104
+ )
105
+
106
+ with gr.Accordion("🎭 Optional: Genres, Companies & More", open=False):
107
+ with gr.Row():
108
+ inputs["genres"] = gr.Dropdown(
109
+ label="Genres",
110
+ choices=feature_options.get("genres", []),
111
+ multiselect=True,
112
+ info="Select one or more genres"
113
+ )
114
+ inputs["production_companies"] = gr.Dropdown(
115
+ label="Production Companies",
116
+ choices=feature_options.get("production_companies", []),
117
+ multiselect=True,
118
+ info="Select production companies"
119
+ )
120
+
121
+ with gr.Row():
122
+ inputs["keywords"] = gr.Dropdown(
123
+ label="Keywords",
124
+ choices=feature_options.get("Keywords", []),
125
+ multiselect=True,
126
+ info="Content keywords"
127
+ )
128
+ inputs["cast"] = gr.Dropdown(
129
+ label="Notable Cast",
130
+ choices=feature_options.get("cast", []),
131
+ multiselect=True,
132
+ info="Famous actors"
133
+ )
134
+
135
+ # Return both dict and ordered list for compatibility
136
+ ordered_list = [
137
+ inputs["budget"],
138
+ inputs["popularity"],
139
+ inputs["runtime"],
140
+ inputs["release_date"],
141
+ inputs["original_language"],
142
+ inputs["belongs_to_collection"],
143
+ inputs["homepage"],
144
+ inputs["title"],
145
+ inputs["tagline"],
146
+ inputs["overview"],
147
+ inputs["num_of_cast"],
148
+ inputs["num_of_crew"],
149
+ inputs["gender_cast_1"],
150
+ inputs["gender_cast_2"],
151
+ inputs["count_cast_other"],
152
+ inputs["genres"],
153
+ inputs["production_companies"],
154
+ inputs["keywords"],
155
+ inputs["cast"],
156
+ ]
157
+
158
+ return inputs, ordered_list
components/prediction.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Prediction logic and output formatting."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from src.preprocess import predict_revenue
8
+
9
+
10
+ INPUT_ORDER = [
11
+ "budget",
12
+ "popularity",
13
+ "runtime",
14
+ "release_date",
15
+ "original_language",
16
+ "belongs_to_collection",
17
+ "homepage",
18
+ "title",
19
+ "tagline",
20
+ "overview",
21
+ "num_of_cast",
22
+ "num_of_crew",
23
+ "gender_cast_1",
24
+ "gender_cast_2",
25
+ "count_cast_other",
26
+ "genres",
27
+ "production_companies",
28
+ "keywords",
29
+ "cast",
30
+ ]
31
+
32
+
33
+ def format_currency(value: float) -> str:
34
+ """Format a number as currency."""
35
+ if value >= 1_000_000_000:
36
+ return f"${value / 1_000_000_000:.2f}B"
37
+ elif value >= 1_000_000:
38
+ return f"${value / 1_000_000:.2f}M"
39
+ else:
40
+ return f"${value:,.0f}"
41
+
42
+
43
+ def predict_revenue_from_form(model: Any, *values: Any) -> tuple[str, str]:
44
+ """
45
+ Predict revenue from form inputs and return formatted results.
46
+
47
+ Returns:
48
+ Tuple of (prediction_text, profitability_text)
49
+ """
50
+ if model is None:
51
+ return "❌ Model not available", ""
52
+
53
+ # Build payload from form values
54
+ payload = dict(zip(INPUT_ORDER, values))
55
+ payload["belongs_to_collection"] = int(bool(payload.get("belongs_to_collection")))
56
+ payload["homepage"] = int(bool(payload.get("homepage")))
57
+ payload["has_tagline"] = 1 if str(payload.get("tagline") or "").strip() else 0
58
+
59
+ try:
60
+ prediction = predict_revenue(model, payload)
61
+ budget = float(payload.get("budget") or 0.0)
62
+
63
+ # Format prediction
64
+ prediction_text = f"## 💰 Predicted Revenue\n### {format_currency(prediction)}"
65
+
66
+ # Calculate ROI
67
+ if budget > 0:
68
+ roi = (prediction - budget) / budget * 100
69
+ multiple = prediction / budget
70
+
71
+ if roi > 100:
72
+ status = "🟢 **Highly Profitable**"
73
+ elif roi > 0:
74
+ status = "🟡 **Profitable**"
75
+ else:
76
+ status = "🔴 **Loss Expected**"
77
+
78
+ profitability_text = f"""
79
+ {status}
80
+
81
+ - **Budget:** {format_currency(budget)}
82
+ - **Revenue Multiple:** {multiple:.2f}x
83
+ - **ROI:** {roi:+.1f}%
84
+ - **Estimated Profit:** {format_currency(prediction - budget)}
85
+ """
86
+ else:
87
+ profitability_text = "ℹ️ Enter a budget to see profitability analysis"
88
+
89
+ return prediction_text, profitability_text
90
+
91
+ except Exception as exc:
92
+ return f"❌ Prediction Error\n```\n{str(exc)}\n```", ""
model/final_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:674d26ddae3d3f076f531fea4cf8f0e8676075f4e082fb3dd3d8467864e064c6
3
+ size 21812058
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==6.5.1
2
+ numpy==2.2.6
3
+ pandas==2.3.3
4
+ scikit-learn==1.6.1
src/__pycache__/preprocess.cpython-310.pyc ADDED
Binary file (5.21 kB). View file
 
src/preprocess.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import pickle
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ LANGUAGE_MAPPING = {"en": 1, "zh": 2, "ja": 3}
13
+
14
+ PREFIX_TO_FORM_KEY = {
15
+ "genres": "genres",
16
+ "production_companies": "production_companies",
17
+ "Keywords": "keywords",
18
+ "cast": "cast",
19
+ }
20
+
21
+
22
+ def load_model(model_path: str | Path) -> Any:
23
+ with Path(model_path).open("rb") as file:
24
+ return pickle.load(file)
25
+
26
+
27
+ def get_model_feature_names(model: Any) -> list[str]:
28
+ if not hasattr(model, "feature_names_in_"):
29
+ raise ValueError("Model does not expose feature_names_in_.")
30
+ return list(model.feature_names_in_)
31
+
32
+
33
+ def count_words(text: str | None) -> int:
34
+ if text is None:
35
+ return 0
36
+ normalized = str(text).strip()
37
+ if not normalized:
38
+ return 0
39
+ return len(normalized.split())
40
+
41
+
42
+ def runtime_category_code(runtime: float) -> int:
43
+ if runtime < 90:
44
+ return 0
45
+ if runtime < 120:
46
+ return 1
47
+ return 2
48
+
49
+
50
+ def parse_release_date(value: str | None) -> datetime:
51
+ if not value:
52
+ return datetime(2010, 1, 1)
53
+ try:
54
+ return datetime.strptime(value, "%Y-%m-%d")
55
+ except ValueError as exc:
56
+ raise ValueError("release_date must be in YYYY-MM-DD format.") from exc
57
+
58
+
59
+ def parse_feature_options(feature_names: list[str]) -> dict[str, list[str]]:
60
+ options: dict[str, set[str]] = {k: set() for k in PREFIX_TO_FORM_KEY}
61
+
62
+ for name in feature_names:
63
+ for prefix in options:
64
+ key = f"{prefix}_"
65
+ if name.startswith(key) and name != f"{prefix}_other":
66
+ options[prefix].add(name[len(key) :])
67
+
68
+ return {k: sorted(v) for k, v in options.items()}
69
+
70
+
71
+ def _to_float(value: Any, default: float = 0.0) -> float:
72
+ try:
73
+ if value is None:
74
+ return default
75
+ return float(value)
76
+ except (TypeError, ValueError):
77
+ return default
78
+
79
+
80
+ def _to_int(value: Any, default: int = 0) -> int:
81
+ try:
82
+ if value is None:
83
+ return default
84
+ return int(value)
85
+ except (TypeError, ValueError):
86
+ return default
87
+
88
+
89
+ def build_feature_row(form_data: dict[str, Any], feature_names: list[str]) -> pd.DataFrame:
90
+ row = {name: 0.0 for name in feature_names}
91
+
92
+ budget = max(_to_float(form_data.get("budget"), 0.0), 0.0)
93
+ popularity = max(_to_float(form_data.get("popularity"), 0.0), 0.0)
94
+ runtime = max(_to_float(form_data.get("runtime"), 0.0), 0.0)
95
+
96
+ release_date = parse_release_date(form_data.get("release_date"))
97
+ release_season = ((release_date.month % 12) + 3) // 3
98
+
99
+ title_text = str(form_data.get("title") or "")
100
+ tagline_text = str(form_data.get("tagline") or "")
101
+ overview_text = str(form_data.get("overview") or "")
102
+
103
+ values = {
104
+ "belongs_to_collection": _to_int(form_data.get("belongs_to_collection"), 0),
105
+ "homepage": _to_int(form_data.get("homepage"), 0),
106
+ "has_tagline": _to_int(form_data.get("has_tagline"), 1 if tagline_text.strip() else 0),
107
+ "original_language": LANGUAGE_MAPPING.get(str(form_data.get("original_language") or "").lower(), 0),
108
+ "runtime": runtime,
109
+ "num_of_cast": _to_float(form_data.get("num_of_cast"), 0.0),
110
+ "num_of_crew": _to_float(form_data.get("num_of_crew"), 0.0),
111
+ "gender_cast_1": _to_float(form_data.get("gender_cast_1"), 0.0),
112
+ "gender_cast_2": _to_float(form_data.get("gender_cast_2"), 0.0),
113
+ "count_cast_other": _to_float(form_data.get("count_cast_other"), 0.0),
114
+ "title_word_count": _to_float(form_data.get("title_word_count"), count_words(title_text)),
115
+ "tag_word_count": _to_float(form_data.get("tag_word_count"), count_words(tagline_text)),
116
+ "overview_word_count": _to_float(form_data.get("overview_word_count"), count_words(overview_text)),
117
+ "release_year": release_date.year,
118
+ "release_month": release_date.month,
119
+ "release_season": release_season,
120
+ "runtime_category": runtime_category_code(runtime),
121
+ "budget_log": math.log1p(budget),
122
+ "popularity_log": math.log1p(popularity),
123
+ }
124
+
125
+ for key, value in values.items():
126
+ if key in row:
127
+ row[key] = value
128
+
129
+ for prefix, form_key in PREFIX_TO_FORM_KEY.items():
130
+ selected = form_data.get(form_key) or []
131
+ if not isinstance(selected, list):
132
+ selected = [selected]
133
+
134
+ known = 0
135
+ for item in selected:
136
+ col = f"{prefix}_{item}"
137
+ if col in row:
138
+ row[col] = 1.0
139
+ known += 1
140
+
141
+ num_col = f"num_of_{prefix}"
142
+ if num_col in row:
143
+ row[num_col] = float(len(selected))
144
+
145
+ other_col = f"{prefix}_other"
146
+ if other_col in row:
147
+ row[other_col] = 1.0 if len(selected) > known else 0.0
148
+
149
+ df = pd.DataFrame([[row[name] for name in feature_names]], columns=feature_names)
150
+ return df.replace([np.inf, -np.inf], 0).fillna(0)
151
+
152
+
153
+ def predict_revenue(model: Any, form_data: dict[str, Any]) -> float:
154
+ feature_names = get_model_feature_names(model)
155
+ frame = build_feature_row(form_data, feature_names)
156
+ pred = model.predict(frame)[0]
157
+ return float(pred)