# app.py # ============================================ # Netflix (KR) Recommender + Review Analyzer — Live TMDb with Posters # - Uses TMDb API (env var: TMDB_API_KEY), with optional UI override # - Gradio app suitable for Hugging Face Spaces (CPU-friendly) # ============================================ import os import time import requests import traceback from typing import Dict, Any, List, Tuple import numpy as np import gradio as gr # Optional NLP models from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM from sentence_transformers import SentenceTransformer # ----------------------------- # Config # ----------------------------- TMDB_BASE = "https://api.themoviedb.org/3" TMDB_IMG_BASE = "https://image.tmdb.org/t/p/w500" # w500 is a good balance for gallery DEFAULT_REGION = "KR" # Load lightweight NLP models (CPU) def _load_models(): # Sentiment (multilingual 1~5 stars) sent = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment", device=-1) # T5 small for Korean one-liners tok = AutoTokenizer.from_pretrained("google/flan-t5-small") mdl = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small") summer = pipeline("text2text-generation", model=mdl, tokenizer=tok, device=-1) # Embedding model for semantic ranking (multilingual) try: emb = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") except Exception: emb = None return sent, summer, emb _sent, _summer, _emb = _load_models() # ----------------------------- # TMDb helpers # ----------------------------- def tmdb_get(api_key: str, path: str, params: Dict[str, Any]) -> Dict[str, Any]: """GET with simple retry/backoff""" url = f"{TMDB_BASE}{path}" p = {"api_key": api_key, **params} last_err = None for attempt in range(3): try: r = requests.get(url, params=p, timeout=25) if r.status_code == 200: return r.json() last_err = f"{r.status_code} {r.text[:200]}" except Exception as e: last_err = str(e) time.sleep(0.7 * (attempt + 1)) raise RuntimeError(f"TMDb request failed: {last_err}") def get_provider_id(api_key: str, region: str, provider_name="Netflix") -> int: """Fetch provider list for region; return provider_id for Netflix (fallback 8).""" data = tmdb_get(api_key, "/watch/providers/movie", {"watch_region": region}) for item in data.get("results", []): if str(item.get("provider_name","")).lower() == provider_name.lower(): return int(item["provider_id"]) return 8 # common fallback def discover_quick(api_key: str, region: str, nfx_id: int, ctype="movie", sort_by="popularity.desc", page_limit=2) -> List[Dict[str, Any]]: """ Use TMDb Discover with Netflix provider filter. """ params = { "watch_region": region, "with_watch_providers": nfx_id, "sort_by": sort_by, "include_adult": False, "language": "ko-KR" } rows = [] for page in range(1, page_limit+1): data = tmdb_get(api_key, f"/discover/{ctype}", {**params, "page": page}) rows.extend([{"type": ctype, **r} for r in data.get("results", [])]) return rows def has_netflix_offer(api_key: str, content_type: str, tmdb_id: int, region: str, nfx_id: int) -> bool: """Check if a specific item is offered on Netflix in the region.""" data = tmdb_get(api_key, f"/{content_type}/{tmdb_id}/watch/providers", {}) results = data.get("results", {}) info = results.get(region, {}) provs = info.get("flatrate", []) + info.get("ads", []) + info.get("free", []) return any(int(p.get("provider_id", -1)) == nfx_id for p in provs) def search_and_filter(api_key: str, query: str, region: str, nfx_id: int, content_types=("movie","tv"), max_pages_each=2, max_total=60) -> List[Dict[str,Any]]: """ 1) Search movie/tv by query 2) Validate Netflix provider for each """ out = [] for ctype in content_types: for page in range(1, max_pages_each+1): data = tmdb_get(api_key, f"/search/{ctype}", { "query": query, "page": page, "include_adult": False, "language": "ko-KR" }) for item in data.get("results", []): tmdb_id = item["id"] try: if has_netflix_offer(api_key, ctype, tmdb_id, region, nfx_id): out.append({"type": ctype, **item}) except Exception: pass if len(out) >= max_total: break if len(out) >= max_total: break return out # ----------------------------- # Ranking & formatting # ----------------------------- def _embed_texts(texts: List[str]) -> np.ndarray: if _emb is None or not texts: return np.zeros((len(texts), 384), dtype=np.float32) X = _emb.encode(texts, normalize_embeddings=True, convert_to_numpy=True, show_progress_bar=False) return X def rank_by_query(items: List[Dict[str, Any]], query: str, topk: int = 10) -> List[Dict[str, Any]]: if not items: return [] if not query or not query.strip() or _emb is None: return items[:topk] texts = [] for it in items: title = it.get("name") or it.get("title") or "" overview = it.get("overview") or "" texts.append(f"{title}. {overview}") q = _emb.encode([query], normalize_embeddings=True, convert_to_numpy=True)[0].reshape(1, -1) X = _emb.encode(texts, normalize_embeddings=True, convert_to_numpy=True) sims = (q @ X.T)[0] idx = np.argsort(-sims)[:topk] return [items[i] for i in idx] def build_gallery(items: List[Dict[str, Any]]) -> Tuple[list, list]: """ Return (gallery_items, table_rows). Gallery expects list of [image, caption] """ gallery = [] rows = [] for it in items: title = it.get("name") or it.get("title") or "" overview = it.get("overview") or "" date = it.get("first_air_date") or it.get("release_date") or "" vote = it.get("vote_average") ctype = "드라마" if it.get("type") == "tv" else "영화" poster = it.get("poster_path") img = f"{TMDB_IMG_BASE}{poster}" if poster else None cap = f"{title} ({ctype})\n평점: {vote} | 공개: {date}\n{overview[:120]}{'...' if len(overview)>120 else ''}" gallery.append([img, cap]) rows.append({"제목": title, "유형": ctype, "공개일": date, "TMDb평점": vote, "개요": overview}) return gallery, rows # ----------------------------- # Business logic (callbacks) # ----------------------------- STAR_MAP = {1:"매우 부정", 2:"부정", 3:"중립", 4:"긍정", 5:"매우 긍정"} def do_recommend(api_key_ui: str, query: str, region: str, mode: str, topk: int, sort_by: str, include_movie: bool, include_tv: bool): try: api_key = (api_key_ui or "").strip() or os.environ.get("TMDB_API_KEY", "").strip() if not api_key: return "TMDb API Key를 입력하거나 환경변수 TMDB_API_KEY를 설정하세요.", None, None nfx_id = get_provider_id(api_key, region, "Netflix") types = [] if include_movie: types.append("movie") if include_tv: types.append("tv") if not types: types = ["movie", "tv"] # Fetch if mode == "빠른 추천(Discover)": items = [] for t in types: items.extend(discover_quick(api_key, region, nfx_id, ctype=t, sort_by=sort_by, page_limit=2)) else: items = search_and_filter(api_key, query or "Netflix", region, nfx_id, content_types=tuple(types), max_pages_each=2, max_total=80) if not items: return f"조건에 맞는 넷플릭스({region}) 작품을 찾지 못했습니다.", None, None ranked = rank_by_query(items, query, topk=topk) gallery, rows = build_gallery(ranked) # One-line pitch for top1 t = ranked[0] top_title = (t.get("name") or t.get("title") or "") pitch_prompt = ( "Summarize in Korean (1-2 sentences):\n" f"사용자 취향/키워드: {query}\n" f"작품: {top_title} / 개요: {t.get('overview','')}" ) pitch = _summer(pitch_prompt, max_new_tokens=80, do_sample=False)[0]["generated_text"] md = f"### ✅ 추천 결과 (Region={region}, Provider=Netflix)\n- Top 1: **{top_title}** — {pitch}" return md, gallery, rows except Exception as e: return f"[오류] {e}\n{traceback.format_exc()}", None, None def analyze_review(title: str, review: str): try: if not review or not review.strip(): return "감상평을 입력해 주세요.", "" res = _sent(review)[0] stars = int(res["label"][0]) head = f"예측 별점: {stars} ({STAR_MAP.get(stars,'중립')}) / 확신도: {float(res['score']):.3f}" summ = _summer( f"Summarize in Korean (1 sentence):\n제목: {title}\n감상평: {review}", max_new_tokens=60, do_sample=False )[0]["generated_text"] return head, f"한줄평: {summ}" except Exception as e: return f"[오류] {e}\n{traceback.format_exc()}", "" # ----------------------------- # Gradio UI # ----------------------------- with gr.Blocks() as demo: gr.Markdown("## 🍿 실시간 넷플릭스(KR) 추천 & 감상평 — TMDb API + 포스터 이미지") with gr.Accordion("TMDb API 설정", open=True): api_key = gr.Textbox(label="TMDb API Key (UI 입력은 선택, 기본은 환경변수 TMDB_API_KEY 사용)", type="password") region = gr.Dropdown(choices=["KR","US","JP","GB","DE","FR","ES"], value=DEFAULT_REGION, label="지역(Watch Region)") with gr.Tab("추천"): query = gr.Textbox(label="키워드/기분(선택)", placeholder="예) 따뜻한 성장 드라마, 무서운 한국 스릴러", lines=2) with gr.Row(): mode = gr.Radio(choices=["빠른 추천(Discover)", "키워드 검색(정확)"], value="빠른 추천(Discover)", label="검색 모드") sort_by = gr.Dropdown(choices=["popularity.desc","vote_average.desc","release_date.desc"], value="popularity.desc", label="정렬(Discover용)") topk = gr.Slider(3, 20, value=9, step=1, label="표시 개수") with gr.Row(): include_movie = gr.Checkbox(value=True, label="영화 포함") include_tv = gr.Checkbox(value=True, label="드라마 포함") btn = gr.Button("추천 받기") out_md = gr.Markdown() out_gallery = gr.Gallery(label="포스터 갤러리", columns=3, height="auto", allow_preview=True) out_table = gr.Dataframe(interactive=False, wrap=True) btn.click( do_recommend, inputs=[api_key, query, region, mode, topk, sort_by, include_movie, include_tv], outputs=[out_md, out_gallery, out_table] ) with gr.Tab("감상평 분석"): title = gr.Textbox(label="제목(선택)", placeholder="추천 탭에서 복사해 붙여넣기") review = gr.Textbox(label="감상평", lines=5, placeholder="예) 초반은 늘어지지만, 배우 연기가 압권이에요.") b2 = gr.Button("분석") head = gr.Markdown() summ = gr.Markdown() b2.click(analyze_review, inputs=[title, review], outputs=[head, summ]) # Expose demo for Spaces app = demo if __name__ == "__main__": demo.launch(share=True, debug=True)