test1 / app.py
kimddol's picture
Upload 2 files
6cb1584 verified
# app.py
# ============================================
# Netflix (KR) Recommender + Review Analyzer โ€” Live TMDb with Posters
# - Uses TMDb API (env var: TMDB_API_KEY), with optional UI override
# - Gradio app suitable for Hugging Face Spaces (CPU-friendly)
# ============================================
import os
import time
import requests
import traceback
from typing import Dict, Any, List, Tuple
import numpy as np
import gradio as gr
# Optional NLP models
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
# -----------------------------
# Config
# -----------------------------
TMDB_BASE = "https://api.themoviedb.org/3"
TMDB_IMG_BASE = "https://image.tmdb.org/t/p/w500" # w500 is a good balance for gallery
DEFAULT_REGION = "KR"
# Load lightweight NLP models (CPU)
def _load_models():
# Sentiment (multilingual 1~5 stars)
sent = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment", device=-1)
# T5 small for Korean one-liners
tok = AutoTokenizer.from_pretrained("google/flan-t5-small")
mdl = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
summer = pipeline("text2text-generation", model=mdl, tokenizer=tok, device=-1)
# Embedding model for semantic ranking (multilingual)
try:
emb = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
except Exception:
emb = None
return sent, summer, emb
_sent, _summer, _emb = _load_models()
# -----------------------------
# TMDb helpers
# -----------------------------
def tmdb_get(api_key: str, path: str, params: Dict[str, Any]) -> Dict[str, Any]:
"""GET with simple retry/backoff"""
url = f"{TMDB_BASE}{path}"
p = {"api_key": api_key, **params}
last_err = None
for attempt in range(3):
try:
r = requests.get(url, params=p, timeout=25)
if r.status_code == 200:
return r.json()
last_err = f"{r.status_code} {r.text[:200]}"
except Exception as e:
last_err = str(e)
time.sleep(0.7 * (attempt + 1))
raise RuntimeError(f"TMDb request failed: {last_err}")
def get_provider_id(api_key: str, region: str, provider_name="Netflix") -> int:
"""Fetch provider list for region; return provider_id for Netflix (fallback 8)."""
data = tmdb_get(api_key, "/watch/providers/movie", {"watch_region": region})
for item in data.get("results", []):
if str(item.get("provider_name","")).lower() == provider_name.lower():
return int(item["provider_id"])
return 8 # common fallback
def discover_quick(api_key: str, region: str, nfx_id: int, ctype="movie",
sort_by="popularity.desc", page_limit=2) -> List[Dict[str, Any]]:
"""
Use TMDb Discover with Netflix provider filter.
"""
params = {
"watch_region": region,
"with_watch_providers": nfx_id,
"sort_by": sort_by,
"include_adult": False,
"language": "ko-KR"
}
rows = []
for page in range(1, page_limit+1):
data = tmdb_get(api_key, f"/discover/{ctype}", {**params, "page": page})
rows.extend([{"type": ctype, **r} for r in data.get("results", [])])
return rows
def has_netflix_offer(api_key: str, content_type: str, tmdb_id: int, region: str, nfx_id: int) -> bool:
"""Check if a specific item is offered on Netflix in the region."""
data = tmdb_get(api_key, f"/{content_type}/{tmdb_id}/watch/providers", {})
results = data.get("results", {})
info = results.get(region, {})
provs = info.get("flatrate", []) + info.get("ads", []) + info.get("free", [])
return any(int(p.get("provider_id", -1)) == nfx_id for p in provs)
def search_and_filter(api_key: str, query: str, region: str, nfx_id: int,
content_types=("movie","tv"), max_pages_each=2, max_total=60) -> List[Dict[str,Any]]:
"""
1) Search movie/tv by query
2) Validate Netflix provider for each
"""
out = []
for ctype in content_types:
for page in range(1, max_pages_each+1):
data = tmdb_get(api_key, f"/search/{ctype}", {
"query": query, "page": page, "include_adult": False, "language": "ko-KR"
})
for item in data.get("results", []):
tmdb_id = item["id"]
try:
if has_netflix_offer(api_key, ctype, tmdb_id, region, nfx_id):
out.append({"type": ctype, **item})
except Exception:
pass
if len(out) >= max_total:
break
if len(out) >= max_total:
break
return out
# -----------------------------
# Ranking & formatting
# -----------------------------
def _embed_texts(texts: List[str]) -> np.ndarray:
if _emb is None or not texts:
return np.zeros((len(texts), 384), dtype=np.float32)
X = _emb.encode(texts, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False)
return X
def rank_by_query(items: List[Dict[str, Any]], query: str, topk: int = 10) -> List[Dict[str, Any]]:
if not items:
return []
if not query or not query.strip() or _emb is None:
return items[:topk]
texts = []
for it in items:
title = it.get("name") or it.get("title") or ""
overview = it.get("overview") or ""
texts.append(f"{title}. {overview}")
q = _emb.encode([query], normalize_embeddings=True, convert_to_numpy=True)[0].reshape(1, -1)
X = _emb.encode(texts, normalize_embeddings=True, convert_to_numpy=True)
sims = (q @ X.T)[0]
idx = np.argsort(-sims)[:topk]
return [items[i] for i in idx]
def build_gallery(items: List[Dict[str, Any]]) -> Tuple[list, list]:
"""
Return (gallery_items, table_rows). Gallery expects list of [image, caption]
"""
gallery = []
rows = []
for it in items:
title = it.get("name") or it.get("title") or ""
overview = it.get("overview") or ""
date = it.get("first_air_date") or it.get("release_date") or ""
vote = it.get("vote_average")
ctype = "๋“œ๋ผ๋งˆ" if it.get("type") == "tv" else "์˜ํ™”"
poster = it.get("poster_path")
img = f"{TMDB_IMG_BASE}{poster}" if poster else None
cap = f"{title} ({ctype})\nํ‰์ : {vote} | ๊ณต๊ฐœ: {date}\n{overview[:120]}{'...' if len(overview)>120 else ''}"
gallery.append([img, cap])
rows.append({"์ œ๋ชฉ": title, "์œ ํ˜•": ctype, "๊ณต๊ฐœ์ผ": date, "TMDbํ‰์ ": vote, "๊ฐœ์š”": overview})
return gallery, rows
# -----------------------------
# Business logic (callbacks)
# -----------------------------
STAR_MAP = {1:"๋งค์šฐ ๋ถ€์ •", 2:"๋ถ€์ •", 3:"์ค‘๋ฆฝ", 4:"๊ธ์ •", 5:"๋งค์šฐ ๊ธ์ •"}
def do_recommend(api_key_ui: str, query: str, region: str, mode: str, topk: int,
sort_by: str, include_movie: bool, include_tv: bool):
try:
api_key = (api_key_ui or "").strip() or os.environ.get("TMDB_API_KEY", "").strip()
if not api_key:
return "TMDb API Key๋ฅผ ์ž…๋ ฅํ•˜๊ฑฐ๋‚˜ ํ™˜๊ฒฝ๋ณ€์ˆ˜ TMDB_API_KEY๋ฅผ ์„ค์ •ํ•˜์„ธ์š”.", None, None
nfx_id = get_provider_id(api_key, region, "Netflix")
types = []
if include_movie: types.append("movie")
if include_tv: types.append("tv")
if not types:
types = ["movie", "tv"]
# Fetch
if mode == "๋น ๋ฅธ ์ถ”์ฒœ(Discover)":
items = []
for t in types:
items.extend(discover_quick(api_key, region, nfx_id, ctype=t, sort_by=sort_by, page_limit=2))
else:
items = search_and_filter(api_key, query or "Netflix", region, nfx_id,
content_types=tuple(types), max_pages_each=2, max_total=80)
if not items:
return f"์กฐ๊ฑด์— ๋งž๋Š” ๋„ทํ”Œ๋ฆญ์Šค({region}) ์ž‘ํ’ˆ์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.", None, None
ranked = rank_by_query(items, query, topk=topk)
gallery, rows = build_gallery(ranked)
# One-line pitch for top1
t = ranked[0]
top_title = (t.get("name") or t.get("title") or "")
pitch_prompt = (
"Summarize in Korean (1-2 sentences):\n"
f"์‚ฌ์šฉ์ž ์ทจํ–ฅ/ํ‚ค์›Œ๋“œ: {query}\n"
f"์ž‘ํ’ˆ: {top_title} / ๊ฐœ์š”: {t.get('overview','')}"
)
pitch = _summer(pitch_prompt, max_new_tokens=80, do_sample=False)[0]["generated_text"]
md = f"### โœ… ์ถ”์ฒœ ๊ฒฐ๊ณผ (Region={region}, Provider=Netflix)\n- Top 1: **{top_title}** โ€” {pitch}"
return md, gallery, rows
except Exception as e:
return f"[์˜ค๋ฅ˜] {e}\n{traceback.format_exc()}", None, None
def analyze_review(title: str, review: str):
try:
if not review or not review.strip():
return "๊ฐ์ƒํ‰์„ ์ž…๋ ฅํ•ด ์ฃผ์„ธ์š”.", ""
res = _sent(review)[0]
stars = int(res["label"][0])
head = f"์˜ˆ์ธก ๋ณ„์ : {stars} ({STAR_MAP.get(stars,'์ค‘๋ฆฝ')}) / ํ™•์‹ ๋„: {float(res['score']):.3f}"
summ = _summer(
f"Summarize in Korean (1 sentence):\n์ œ๋ชฉ: {title}\n๊ฐ์ƒํ‰: {review}",
max_new_tokens=60, do_sample=False
)[0]["generated_text"]
return head, f"ํ•œ์ค„ํ‰: {summ}"
except Exception as e:
return f"[์˜ค๋ฅ˜] {e}\n{traceback.format_exc()}", ""
# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks() as demo:
gr.Markdown("## ๐Ÿฟ ์‹ค์‹œ๊ฐ„ ๋„ทํ”Œ๋ฆญ์Šค(KR) ์ถ”์ฒœ & ๊ฐ์ƒํ‰ โ€” TMDb API + ํฌ์Šคํ„ฐ ์ด๋ฏธ์ง€")
with gr.Accordion("TMDb API ์„ค์ •", open=True):
api_key = gr.Textbox(label="TMDb API Key (UI ์ž…๋ ฅ์€ ์„ ํƒ, ๊ธฐ๋ณธ์€ ํ™˜๊ฒฝ๋ณ€์ˆ˜ TMDB_API_KEY ์‚ฌ์šฉ)", type="password")
region = gr.Dropdown(choices=["KR","US","JP","GB","DE","FR","ES"], value=DEFAULT_REGION, label="์ง€์—ญ(Watch Region)")
with gr.Tab("์ถ”์ฒœ"):
query = gr.Textbox(label="ํ‚ค์›Œ๋“œ/๊ธฐ๋ถ„(์„ ํƒ)", placeholder="์˜ˆ) ๋”ฐ๋œปํ•œ ์„ฑ์žฅ ๋“œ๋ผ๋งˆ, ๋ฌด์„œ์šด ํ•œ๊ตญ ์Šค๋ฆด๋Ÿฌ", lines=2)
with gr.Row():
mode = gr.Radio(choices=["๋น ๋ฅธ ์ถ”์ฒœ(Discover)", "ํ‚ค์›Œ๋“œ ๊ฒ€์ƒ‰(์ •ํ™•)"], value="๋น ๋ฅธ ์ถ”์ฒœ(Discover)", label="๊ฒ€์ƒ‰ ๋ชจ๋“œ")
sort_by = gr.Dropdown(choices=["popularity.desc","vote_average.desc","release_date.desc"], value="popularity.desc", label="์ •๋ ฌ(Discover์šฉ)")
topk = gr.Slider(3, 20, value=9, step=1, label="ํ‘œ์‹œ ๊ฐœ์ˆ˜")
with gr.Row():
include_movie = gr.Checkbox(value=True, label="์˜ํ™” ํฌํ•จ")
include_tv = gr.Checkbox(value=True, label="๋“œ๋ผ๋งˆ ํฌํ•จ")
btn = gr.Button("์ถ”์ฒœ ๋ฐ›๊ธฐ")
out_md = gr.Markdown()
out_gallery = gr.Gallery(label="ํฌ์Šคํ„ฐ ๊ฐค๋Ÿฌ๋ฆฌ", columns=3, height="auto", allow_preview=True)
out_table = gr.Dataframe(interactive=False, wrap=True)
btn.click(
do_recommend,
inputs=[api_key, query, region, mode, topk, sort_by, include_movie, include_tv],
outputs=[out_md, out_gallery, out_table]
)
with gr.Tab("๊ฐ์ƒํ‰ ๋ถ„์„"):
title = gr.Textbox(label="์ œ๋ชฉ(์„ ํƒ)", placeholder="์ถ”์ฒœ ํƒญ์—์„œ ๋ณต์‚ฌํ•ด ๋ถ™์—ฌ๋„ฃ๊ธฐ")
review = gr.Textbox(label="๊ฐ์ƒํ‰", lines=5, placeholder="์˜ˆ) ์ดˆ๋ฐ˜์€ ๋Š˜์–ด์ง€์ง€๋งŒ, ๋ฐฐ์šฐ ์—ฐ๊ธฐ๊ฐ€ ์••๊ถŒ์ด์—์š”.")
b2 = gr.Button("๋ถ„์„")
head = gr.Markdown()
summ = gr.Markdown()
b2.click(analyze_review, inputs=[title, review], outputs=[head, summ])
# Expose demo for Spaces
app = demo
if __name__ == "__main__":
demo.launch(share=True, debug=True)