kimddol commited on
Commit
6cb1584
ยท
verified ยท
1 Parent(s): 5df8f46

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +276 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # ============================================
3
+ # Netflix (KR) Recommender + Review Analyzer โ€” Live TMDb with Posters
4
+ # - Uses TMDb API (env var: TMDB_API_KEY), with optional UI override
5
+ # - Gradio app suitable for Hugging Face Spaces (CPU-friendly)
6
+ # ============================================
7
+
8
+ import os
9
+ import time
10
+ import requests
11
+ import traceback
12
+ from typing import Dict, Any, List, Tuple
13
+
14
+ import numpy as np
15
+ import gradio as gr
16
+
17
+ # Optional NLP models
18
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
19
+ from sentence_transformers import SentenceTransformer
20
+
21
+ # -----------------------------
22
+ # Config
23
+ # -----------------------------
24
+ TMDB_BASE = "https://api.themoviedb.org/3"
25
+ TMDB_IMG_BASE = "https://image.tmdb.org/t/p/w500" # w500 is a good balance for gallery
26
+ DEFAULT_REGION = "KR"
27
+
28
+ # Load lightweight NLP models (CPU)
29
+ def _load_models():
30
+ # Sentiment (multilingual 1~5 stars)
31
+ sent = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment", device=-1)
32
+ # T5 small for Korean one-liners
33
+ tok = AutoTokenizer.from_pretrained("google/flan-t5-small")
34
+ mdl = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
35
+ summer = pipeline("text2text-generation", model=mdl, tokenizer=tok, device=-1)
36
+ # Embedding model for semantic ranking (multilingual)
37
+ try:
38
+ emb = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
39
+ except Exception:
40
+ emb = None
41
+ return sent, summer, emb
42
+
43
+ _sent, _summer, _emb = _load_models()
44
+
45
+
46
+ # -----------------------------
47
+ # TMDb helpers
48
+ # -----------------------------
49
+ def tmdb_get(api_key: str, path: str, params: Dict[str, Any]) -> Dict[str, Any]:
50
+ """GET with simple retry/backoff"""
51
+ url = f"{TMDB_BASE}{path}"
52
+ p = {"api_key": api_key, **params}
53
+ last_err = None
54
+ for attempt in range(3):
55
+ try:
56
+ r = requests.get(url, params=p, timeout=25)
57
+ if r.status_code == 200:
58
+ return r.json()
59
+ last_err = f"{r.status_code} {r.text[:200]}"
60
+ except Exception as e:
61
+ last_err = str(e)
62
+ time.sleep(0.7 * (attempt + 1))
63
+ raise RuntimeError(f"TMDb request failed: {last_err}")
64
+
65
+ def get_provider_id(api_key: str, region: str, provider_name="Netflix") -> int:
66
+ """Fetch provider list for region; return provider_id for Netflix (fallback 8)."""
67
+ data = tmdb_get(api_key, "/watch/providers/movie", {"watch_region": region})
68
+ for item in data.get("results", []):
69
+ if str(item.get("provider_name","")).lower() == provider_name.lower():
70
+ return int(item["provider_id"])
71
+ return 8 # common fallback
72
+
73
+ def discover_quick(api_key: str, region: str, nfx_id: int, ctype="movie",
74
+ sort_by="popularity.desc", page_limit=2) -> List[Dict[str, Any]]:
75
+ """
76
+ Use TMDb Discover with Netflix provider filter.
77
+ """
78
+ params = {
79
+ "watch_region": region,
80
+ "with_watch_providers": nfx_id,
81
+ "sort_by": sort_by,
82
+ "include_adult": False,
83
+ "language": "ko-KR"
84
+ }
85
+ rows = []
86
+ for page in range(1, page_limit+1):
87
+ data = tmdb_get(api_key, f"/discover/{ctype}", {**params, "page": page})
88
+ rows.extend([{"type": ctype, **r} for r in data.get("results", [])])
89
+ return rows
90
+
91
+ def has_netflix_offer(api_key: str, content_type: str, tmdb_id: int, region: str, nfx_id: int) -> bool:
92
+ """Check if a specific item is offered on Netflix in the region."""
93
+ data = tmdb_get(api_key, f"/{content_type}/{tmdb_id}/watch/providers", {})
94
+ results = data.get("results", {})
95
+ info = results.get(region, {})
96
+ provs = info.get("flatrate", []) + info.get("ads", []) + info.get("free", [])
97
+ return any(int(p.get("provider_id", -1)) == nfx_id for p in provs)
98
+
99
+ def search_and_filter(api_key: str, query: str, region: str, nfx_id: int,
100
+ content_types=("movie","tv"), max_pages_each=2, max_total=60) -> List[Dict[str,Any]]:
101
+ """
102
+ 1) Search movie/tv by query
103
+ 2) Validate Netflix provider for each
104
+ """
105
+ out = []
106
+ for ctype in content_types:
107
+ for page in range(1, max_pages_each+1):
108
+ data = tmdb_get(api_key, f"/search/{ctype}", {
109
+ "query": query, "page": page, "include_adult": False, "language": "ko-KR"
110
+ })
111
+ for item in data.get("results", []):
112
+ tmdb_id = item["id"]
113
+ try:
114
+ if has_netflix_offer(api_key, ctype, tmdb_id, region, nfx_id):
115
+ out.append({"type": ctype, **item})
116
+ except Exception:
117
+ pass
118
+ if len(out) >= max_total:
119
+ break
120
+ if len(out) >= max_total:
121
+ break
122
+ return out
123
+
124
+
125
+ # -----------------------------
126
+ # Ranking & formatting
127
+ # -----------------------------
128
+ def _embed_texts(texts: List[str]) -> np.ndarray:
129
+ if _emb is None or not texts:
130
+ return np.zeros((len(texts), 384), dtype=np.float32)
131
+ X = _emb.encode(texts, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False)
132
+ return X
133
+
134
+ def rank_by_query(items: List[Dict[str, Any]], query: str, topk: int = 10) -> List[Dict[str, Any]]:
135
+ if not items:
136
+ return []
137
+ if not query or not query.strip() or _emb is None:
138
+ return items[:topk]
139
+ texts = []
140
+ for it in items:
141
+ title = it.get("name") or it.get("title") or ""
142
+ overview = it.get("overview") or ""
143
+ texts.append(f"{title}. {overview}")
144
+ q = _emb.encode([query], normalize_embeddings=True, convert_to_numpy=True)[0].reshape(1, -1)
145
+ X = _emb.encode(texts, normalize_embeddings=True, convert_to_numpy=True)
146
+ sims = (q @ X.T)[0]
147
+ idx = np.argsort(-sims)[:topk]
148
+ return [items[i] for i in idx]
149
+
150
+ def build_gallery(items: List[Dict[str, Any]]) -> Tuple[list, list]:
151
+ """
152
+ Return (gallery_items, table_rows). Gallery expects list of [image, caption]
153
+ """
154
+ gallery = []
155
+ rows = []
156
+ for it in items:
157
+ title = it.get("name") or it.get("title") or ""
158
+ overview = it.get("overview") or ""
159
+ date = it.get("first_air_date") or it.get("release_date") or ""
160
+ vote = it.get("vote_average")
161
+ ctype = "๋“œ๋ผ๋งˆ" if it.get("type") == "tv" else "์˜ํ™”"
162
+ poster = it.get("poster_path")
163
+ img = f"{TMDB_IMG_BASE}{poster}" if poster else None
164
+ cap = f"{title} ({ctype})\nํ‰์ : {vote} | ๊ณต๊ฐœ: {date}\n{overview[:120]}{'...' if len(overview)>120 else ''}"
165
+ gallery.append([img, cap])
166
+ rows.append({"์ œ๋ชฉ": title, "์œ ํ˜•": ctype, "๊ณต๊ฐœ์ผ": date, "TMDbํ‰์ ": vote, "๊ฐœ์š”": overview})
167
+ return gallery, rows
168
+
169
+
170
+ # -----------------------------
171
+ # Business logic (callbacks)
172
+ # -----------------------------
173
+ STAR_MAP = {1:"๋งค์šฐ ๋ถ€์ •", 2:"๋ถ€์ •", 3:"์ค‘๋ฆฝ", 4:"๊ธ์ •", 5:"๋งค์šฐ ๊ธ์ •"}
174
+
175
+ def do_recommend(api_key_ui: str, query: str, region: str, mode: str, topk: int,
176
+ sort_by: str, include_movie: bool, include_tv: bool):
177
+ try:
178
+ api_key = (api_key_ui or "").strip() or os.environ.get("TMDB_API_KEY", "").strip()
179
+ if not api_key:
180
+ return "TMDb API Key๋ฅผ ์ž…๋ ฅํ•˜๊ฑฐ๋‚˜ ํ™˜๊ฒฝ๋ณ€์ˆ˜ TMDB_API_KEY๋ฅผ ์„ค์ •ํ•˜์„ธ์š”.", None, None
181
+ nfx_id = get_provider_id(api_key, region, "Netflix")
182
+
183
+ types = []
184
+ if include_movie: types.append("movie")
185
+ if include_tv: types.append("tv")
186
+ if not types:
187
+ types = ["movie", "tv"]
188
+
189
+ # Fetch
190
+ if mode == "๋น ๋ฅธ ์ถ”์ฒœ(Discover)":
191
+ items = []
192
+ for t in types:
193
+ items.extend(discover_quick(api_key, region, nfx_id, ctype=t, sort_by=sort_by, page_limit=2))
194
+ else:
195
+ items = search_and_filter(api_key, query or "Netflix", region, nfx_id,
196
+ content_types=tuple(types), max_pages_each=2, max_total=80)
197
+
198
+ if not items:
199
+ return f"์กฐ๊ฑด์— ๋งž๋Š” ๋„ทํ”Œ๋ฆญ์Šค({region}) ์ž‘ํ’ˆ์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.", None, None
200
+
201
+ ranked = rank_by_query(items, query, topk=topk)
202
+ gallery, rows = build_gallery(ranked)
203
+ # One-line pitch for top1
204
+ t = ranked[0]
205
+ top_title = (t.get("name") or t.get("title") or "")
206
+ pitch_prompt = (
207
+ "Summarize in Korean (1-2 sentences):\n"
208
+ f"์‚ฌ์šฉ์ž ์ทจํ–ฅ/ํ‚ค์›Œ๋“œ: {query}\n"
209
+ f"์ž‘ํ’ˆ: {top_title} / ๊ฐœ์š”: {t.get('overview','')}"
210
+ )
211
+ pitch = _summer(pitch_prompt, max_new_tokens=80, do_sample=False)[0]["generated_text"]
212
+ md = f"### โœ… ์ถ”์ฒœ ๊ฒฐ๊ณผ (Region={region}, Provider=Netflix)\n- Top 1: **{top_title}** โ€” {pitch}"
213
+ return md, gallery, rows
214
+ except Exception as e:
215
+ return f"[์˜ค๋ฅ˜] {e}\n{traceback.format_exc()}", None, None
216
+
217
+ def analyze_review(title: str, review: str):
218
+ try:
219
+ if not review or not review.strip():
220
+ return "๊ฐ์ƒํ‰์„ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”.", ""
221
+ res = _sent(review)[0]
222
+ stars = int(res["label"][0])
223
+ head = f"์˜ˆ์ธก ๋ณ„์ : {stars} ({STAR_MAP.get(stars,'์ค‘๋ฆฝ')}) / ํ™•์‹ ๋„: {float(res['score']):.3f}"
224
+ summ = _summer(
225
+ f"Summarize in Korean (1 sentence):\n์ œ๋ชฉ: {title}\n๊ฐ์ƒํ‰: {review}",
226
+ max_new_tokens=60, do_sample=False
227
+ )[0]["generated_text"]
228
+ return head, f"ํ•œ์ค„ํ‰: {summ}"
229
+ except Exception as e:
230
+ return f"[์˜ค๋ฅ˜] {e}\n{traceback.format_exc()}", ""
231
+
232
+
233
+ # -----------------------------
234
+ # Gradio UI
235
+ # -----------------------------
236
+ with gr.Blocks() as demo:
237
+ gr.Markdown("## ๐Ÿฟ ์‹ค์‹œ๊ฐ„ ๋„ทํ”Œ๋ฆญ์Šค(KR) ์ถ”์ฒœ & ๊ฐ์ƒํ‰ โ€” TMDb API + ํฌ์Šคํ„ฐ ์ด๋ฏธ์ง€")
238
+
239
+ with gr.Accordion("TMDb API ์„ค์ •", open=True):
240
+ api_key = gr.Textbox(label="TMDb API Key (UI ์ž…๋ ฅ์€ ์„ ํƒ, ๊ธฐ๋ณธ์€ ํ™˜๊ฒฝ๋ณ€์ˆ˜ TMDB_API_KEY ์‚ฌ์šฉ)", type="password")
241
+ region = gr.Dropdown(choices=["KR","US","JP","GB","DE","FR","ES"], value=DEFAULT_REGION, label="์ง€์—ญ(Watch Region)")
242
+
243
+ with gr.Tab("์ถ”์ฒœ"):
244
+ query = gr.Textbox(label="ํ‚ค์›Œ๋“œ/๊ธฐ๋ถ„(์„ ํƒ)", placeholder="์˜ˆ) ๋”ฐ๋œปํ•œ ์„ฑ์žฅ ๋“œ๋ผ๋งˆ, ๋ฌด์„œ์šด ํ•œ๊ตญ ์Šค๋ฆด๋Ÿฌ", lines=2)
245
+ with gr.Row():
246
+ mode = gr.Radio(choices=["๋น ๋ฅธ ์ถ”์ฒœ(Discover)", "ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰(์ •ํ™•)"], value="๋น ๋ฅธ ์ถ”์ฒœ(Discover)", label="๊ฒ€์ƒ‰ ๋ชจ๋“œ")
247
+ sort_by = gr.Dropdown(choices=["popularity.desc","vote_average.desc","release_date.desc"], value="popularity.desc", label="์ •๋ ฌ(Discover์šฉ)")
248
+ topk = gr.Slider(3, 20, value=9, step=1, label="ํ‘œ์‹œ ๊ฐœ์ˆ˜")
249
+ with gr.Row():
250
+ include_movie = gr.Checkbox(value=True, label="์˜ํ™” ํฌํ•จ")
251
+ include_tv = gr.Checkbox(value=True, label="๋“œ๋ผ๋งˆ ํฌํ•จ")
252
+ btn = gr.Button("์ถ”์ฒœ ๋ฐ›๊ธฐ")
253
+
254
+ out_md = gr.Markdown()
255
+ out_gallery = gr.Gallery(label="ํฌ์Šคํ„ฐ ๊ฐค๋Ÿฌ๋ฆฌ", columns=3, height="auto", allow_preview=True)
256
+ out_table = gr.Dataframe(interactive=False, wrap=True)
257
+
258
+ btn.click(
259
+ do_recommend,
260
+ inputs=[api_key, query, region, mode, topk, sort_by, include_movie, include_tv],
261
+ outputs=[out_md, out_gallery, out_table]
262
+ )
263
+
264
+ with gr.Tab("๊ฐ์ƒํ‰ ๋ถ„์„"):
265
+ title = gr.Textbox(label="์ œ๋ชฉ(์„ ํƒ)", placeholder="์ถ”์ฒœ ํƒญ์—์„œ ๋ณต์‚ฌํ•ด ๋ถ™์—ฌ๋„ฃ๊ธฐ")
266
+ review = gr.Textbox(label="๊ฐ์ƒํ‰", lines=5, placeholder="์˜ˆ) ์ดˆ๋ฐ˜์€ ๋Š˜์–ด์ง€์ง€๋งŒ, ๋ฐฐ์šฐ ์—ฐ๊ธฐ๊ฐ€ ์••๊ถŒ์ด์—์š”.")
267
+ b2 = gr.Button("๋ถ„์„")
268
+ head = gr.Markdown()
269
+ summ = gr.Markdown()
270
+ b2.click(analyze_review, inputs=[title, review], outputs=[head, summ])
271
+
272
+ # Expose demo for Spaces
273
+ app = demo
274
+
275
+ if __name__ == "__main__":
276
+ demo.launch(share=True, debug=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=4.40.0
2
+ transformers>=4.43.3
3
+ sentence-transformers>=3.0.1
4
+ torch
5
+ requests
6
+ numpy