import pandas as pd import requests from bs4 import BeautifulSoup import re from sentence_transformers import SentenceTransformer, util from huggingface_hub import InferenceClient import gradio as gr import os print("πŸš€ Loading Script Intelligence Engine...") fallback_script = """ [ Scene starts in FBI facility ] RESSLER: He’s demanding to speak with you. ELIZABETH: Me? Why me? I don't know him. RESSLER: We don't know why. But we need to hear what he has to say. [ Elizabeth walks into the interrogation room ] REDDINGTON: You got rid of your highlights. You look much less... Baltimore. ELIZABETH: You asked to see me. Why? REDDINGTON: I'm a criminal, Lizzie. You're a profiler. You tell me. ELIZABETH: I think you're bored. I think you're using us. REDDINGTON: I'm going to make you famous. I have a list. REDDINGTON: A blacklist of the worst criminals in the world. REDDINGTON: Starting with Ranko Zamani. REDDINGTON: He is in town, Lizzie. REDDINGTON: And he plans to kidnap the general's daughter. """ url = "https://subslikescript.com/series/The_Blacklist-2741602/season-1/episode-1-Pilot" headers = {'User-Agent': 'Mozilla/5.0'} try: response = requests.get(url, headers=headers, timeout=8) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') script_div = soup.find('div', class_='full-script') raw_text = script_div.get_text(separator="\n") if script_div else fallback_script print("βœ… Live script loaded") except Exception as e: print(f"⚠️ Scrape blocked: {e}") raw_text = fallback_script print("🧠 Processing unstructured data...") lines = raw_text.split("\n") structured_data = [] dialogue_pattern = re.compile(r"^([A-Z\s]+):\s*(.*)") for line in lines: line = line.strip() if not line: continue match = dialogue_pattern.match(line) if match: structured_data.append({ "Character": match.group(1).strip(), "Dialogue": match.group(2).strip() }) df = pd.DataFrame(structured_data) df["Full_Line"] = df["Character"] + ": " + df["Dialogue"] window_size = 1 context_chunks = [] for i in range(len(df)): start = max(0, i - window_size) end = min(len(df), i + window_size + 1) chunk = "\n".join(df["Full_Line"].iloc[start:end]) context_chunks.append({ "context_block": chunk }) ai_df = pd.DataFrame(context_chunks) print("⚑ Loading embedding model...") model = SentenceTransformer("all-MiniLM-L6-v2") vector_database = model.encode(ai_df["context_block"].tolist()) hf_token = os.environ.get("HF_TOKEN") client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct", token=hf_token) def search_script(user_query, history): if not user_query.strip(): return history, "" try: query_vector = model.encode(user_query) hits = util.semantic_search(query_vector, vector_database, top_k=2) retrieved_contexts = [ ai_df["context_block"].iloc[hit["corpus_id"]] for hit in hits[0] ] combined_context = "\n\n━━━━━━━━━━━━━━\n\n".join(retrieved_contexts) messages = [ { "role": "system", "content": "You are an expert TV Script Analyst AI.\n\nRules:\n- Answer ONLY from provided excerpts\n- Never invent lore\n- Explain naturally\n- Be concise and smart" }, { "role": "user", "content": f"Question:\n{user_query}\n\nScript Excerpts:\n{combined_context}" } ] response = client.chat.completions.create( messages=messages, max_tokens=220, temperature=0.25 ) ai_answer = response.choices[0].message.content.strip() except Exception as e: ai_answer = f"❌ Error: {str(e)}" combined_context = "No context retrieved." formatted_response = f"## πŸ€– AI Analysis\n\n{ai_answer}\n\n---\n\n### πŸ“š Retrieved Script Context\n\n```text\n{combined_context}\n```\n" history.append({"role": "user", "content": user_query}) history.append({"role": "assistant", "content": formatted_response}) return history, "" custom_css = """ @import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@500;700&family=Inter:wght@300;400;500;600&display=swap'); body { background: #050816 !important; } .gradio-container { background: radial-gradient(circle at top left, #101935 0%, #050816 60%, #02030a 100%) !important; color: white !important; font-family: 'Inter', sans-serif; } .main-title { text-align: center; font-size: 42px; font-family: 'Orbitron', sans-serif; font-weight: 700; background: linear-gradient(90deg, #00eaff, #6f8cff, #00eaff); -webkit-background-clip: text; -webkit-text-fill-color: transparent; text-shadow: 0 0 10px #00eaff, 0 0 20px #00eaff; } .subtitle { text-align: center; color: #9ab0ff; font-size: 17px; margin-top: -10px; margin-bottom: 25px; } .glass-chat { border-radius: 24px !important; background: rgba(255,255,255,0.06) !important; border: 1px solid rgba(255,255,255,0.08) !important; backdrop-filter: blur(20px); box-shadow: 0 0 35px rgba(0,255,255,0.12); } textarea { background: rgba(255,255,255,0.06) !important; color: white !important; border-radius: 18px !important; border: 1px solid rgba(0,255,255,0.2) !important; box-shadow: 0 0 15px rgba(0,255,255,0.1); } button { background: linear-gradient(135deg, #00d9ff, #6b63ff) !important; border: none !important; border-radius: 16px !important; color: white !important; font-weight: bold !important; box-shadow: 0 0 20px rgba(0,217,255,0.5) !important; transition: all 0.3s ease !important; } button:hover { transform: scale(1.04); } footer { display: none !important; } """ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: gr.HTML("""
AI Narrative Intelligence Engine
Semantic Retrieval + LLM Reasoning on Unstructured TV Dialogue
""") chatbot = gr.Chatbot(type="messages", height=600, elem_classes="glass-chat") with gr.Row(): user_input = gr.Textbox(placeholder="Ask anything about the script...", lines=2, scale=8) send_btn = gr.Button("πŸš€ Analyze", scale=1) gr.Examples( examples=[ ["Who is Ranko Zamani?"], ["Why does Reddington want Elizabeth?"], ["What is the blacklist?"], ["What is Reddington offering?"] ], inputs=user_input ) send_btn.click(fn=search_script, inputs=[user_input, chatbot], outputs=[chatbot, user_input]) user_input.submit(fn=search_script, inputs=[user_input, chatbot], outputs=[chatbot, user_input]) demo.launch()