Spaces:

Atulkumar001
/

Conversational-RAG-Agent

Runtime error

File size: 6,764 Bytes

import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
from sentence_transformers import SentenceTransformer, util
from huggingface_hub import InferenceClient
import gradio as gr
import os

print("🚀 Loading Script Intelligence Engine...")

fallback_script = """
[ Scene starts in FBI facility ]
RESSLER: He’s demanding to speak with you.
ELIZABETH: Me? Why me? I don't know him.
RESSLER: We don't know why. But we need to hear what he has to say.
[ Elizabeth walks into the interrogation room ]
REDDINGTON: You got rid of your highlights. You look much less... Baltimore.
ELIZABETH: You asked to see me. Why?
REDDINGTON: I'm a criminal, Lizzie. You're a profiler. You tell me.
ELIZABETH: I think you're bored. I think you're using us.
REDDINGTON: I'm going to make you famous. I have a list.
REDDINGTON: A blacklist of the worst criminals in the world.
REDDINGTON: Starting with Ranko Zamani.
REDDINGTON: He is in town, Lizzie.
REDDINGTON: And he plans to kidnap the general's daughter.
"""

url = "https://subslikescript.com/series/The_Blacklist-2741602/season-1/episode-1-Pilot"
headers = {'User-Agent': 'Mozilla/5.0'}

try:
    response = requests.get(url, headers=headers, timeout=8)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, 'html.parser')
    script_div = soup.find('div', class_='full-script')
    raw_text = script_div.get_text(separator="\n") if script_div else fallback_script
    print("✅ Live script loaded")
except Exception as e:
    print(f"⚠️ Scrape blocked: {e}")
    raw_text = fallback_script


print("🧠 Processing unstructured data...")

lines = raw_text.split("\n")
structured_data = []
dialogue_pattern = re.compile(r"^([A-Z\s]+):\s*(.*)")

for line in lines:
    line = line.strip()
    if not line:
        continue
    match = dialogue_pattern.match(line)
    if match:
        structured_data.append({
            "Character": match.group(1).strip(),
            "Dialogue": match.group(2).strip()
        })

df = pd.DataFrame(structured_data)
df["Full_Line"] = df["Character"] + ": " + df["Dialogue"]

window_size = 1
context_chunks = []

for i in range(len(df)):
    start = max(0, i - window_size)
    end = min(len(df), i + window_size + 1)
    chunk = "\n".join(df["Full_Line"].iloc[start:end])
    context_chunks.append({
        "context_block": chunk
    })

ai_df = pd.DataFrame(context_chunks)


print("⚡ Loading embedding model...")

model = SentenceTransformer("all-MiniLM-L6-v2")
vector_database = model.encode(ai_df["context_block"].tolist())
hf_token = os.environ.get("HF_TOKEN")
client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct", token=hf_token)


def search_script(user_query, history):
    if not user_query.strip():
        return history, ""

    try:
        query_vector = model.encode(user_query)
        hits = util.semantic_search(query_vector, vector_database, top_k=2)
        
        retrieved_contexts = [
            ai_df["context_block"].iloc[hit["corpus_id"]]
            for hit in hits[0]
        ]
        
        combined_context = "\n\n━━━━━━━━━━━━━━\n\n".join(retrieved_contexts)

        messages = [
            {
                "role": "system",
                "content": "You are an expert TV Script Analyst AI.\n\nRules:\n- Answer ONLY from provided excerpts\n- Never invent lore\n- Explain naturally\n- Be concise and smart"
            },
            {
                "role": "user",
                "content": f"Question:\n{user_query}\n\nScript Excerpts:\n{combined_context}"
            }
        ]

        response = client.chat.completions.create(
            messages=messages,
            max_tokens=220,
            temperature=0.25
        )

        ai_answer = response.choices[0].message.content.strip()

    except Exception as e:
        ai_answer = f"❌ Error: {str(e)}"
        combined_context = "No context retrieved."

    formatted_response = f"## 🤖 AI Analysis\n\n{ai_answer}\n\n---\n\n### 📚 Retrieved Script Context\n\n```text\n{combined_context}\n```\n"

    history.append({"role": "user", "content": user_query})
    history.append({"role": "assistant", "content": formatted_response})

    return history, ""


custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@500;700&family=Inter:wght@300;400;500;600&display=swap');
body { background: #050816 !important; }
.gradio-container { background: radial-gradient(circle at top left, #101935 0%, #050816 60%, #02030a 100%) !important; color: white !important; font-family: 'Inter', sans-serif; }
.main-title { text-align: center; font-size: 42px; font-family: 'Orbitron', sans-serif; font-weight: 700; background: linear-gradient(90deg, #00eaff, #6f8cff, #00eaff); -webkit-background-clip: text; -webkit-text-fill-color: transparent; text-shadow: 0 0 10px #00eaff, 0 0 20px #00eaff; }
.subtitle { text-align: center; color: #9ab0ff; font-size: 17px; margin-top: -10px; margin-bottom: 25px; }
.glass-chat { border-radius: 24px !important; background: rgba(255,255,255,0.06) !important; border: 1px solid rgba(255,255,255,0.08) !important; backdrop-filter: blur(20px); box-shadow: 0 0 35px rgba(0,255,255,0.12); }
textarea { background: rgba(255,255,255,0.06) !important; color: white !important; border-radius: 18px !important; border: 1px solid rgba(0,255,255,0.2) !important; box-shadow: 0 0 15px rgba(0,255,255,0.1); }
button { background: linear-gradient(135deg, #00d9ff, #6b63ff) !important; border: none !important; border-radius: 16px !important; color: white !important; font-weight: bold !important; box-shadow: 0 0 20px rgba(0,217,255,0.5) !important; transition: all 0.3s ease !important; }
button:hover { transform: scale(1.04); }
footer { display: none !important; }
"""

with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
    gr.HTML("""
    <div class="main-title">AI Narrative Intelligence Engine</div>
    <div class="subtitle">Semantic Retrieval + LLM Reasoning on Unstructured TV Dialogue</div>
    """)

    chatbot = gr.Chatbot(type="messages", height=600, elem_classes="glass-chat")

    with gr.Row():
        user_input = gr.Textbox(placeholder="Ask anything about the script...", lines=2, scale=8)
        send_btn = gr.Button("🚀 Analyze", scale=1)

    gr.Examples(
        examples=[
            ["Who is Ranko Zamani?"],
            ["Why does Reddington want Elizabeth?"],
            ["What is the blacklist?"],
            ["What is Reddington offering?"]
        ],
        inputs=user_input
    )

    send_btn.click(fn=search_script, inputs=[user_input, chatbot], outputs=[chatbot, user_input])
    user_input.submit(fn=search_script, inputs=[user_input, chatbot], outputs=[chatbot, user_input])

demo.launch()