Atulkumar001's picture
Update app.py
36ca333 verified
Raw
History Blame Contribute Delete
6.76 kB
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
from sentence_transformers import SentenceTransformer, util
from huggingface_hub import InferenceClient
import gradio as gr
import os
print("πŸš€ Loading Script Intelligence Engine...")
fallback_script = """
[ Scene starts in FBI facility ]
RESSLER: He’s demanding to speak with you.
ELIZABETH: Me? Why me? I don't know him.
RESSLER: We don't know why. But we need to hear what he has to say.
[ Elizabeth walks into the interrogation room ]
REDDINGTON: You got rid of your highlights. You look much less... Baltimore.
ELIZABETH: You asked to see me. Why?
REDDINGTON: I'm a criminal, Lizzie. You're a profiler. You tell me.
ELIZABETH: I think you're bored. I think you're using us.
REDDINGTON: I'm going to make you famous. I have a list.
REDDINGTON: A blacklist of the worst criminals in the world.
REDDINGTON: Starting with Ranko Zamani.
REDDINGTON: He is in town, Lizzie.
REDDINGTON: And he plans to kidnap the general's daughter.
"""
url = "https://subslikescript.com/series/The_Blacklist-2741602/season-1/episode-1-Pilot"
headers = {'User-Agent': 'Mozilla/5.0'}
try:
response = requests.get(url, headers=headers, timeout=8)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
script_div = soup.find('div', class_='full-script')
raw_text = script_div.get_text(separator="\n") if script_div else fallback_script
print("βœ… Live script loaded")
except Exception as e:
print(f"⚠️ Scrape blocked: {e}")
raw_text = fallback_script
print("🧠 Processing unstructured data...")
lines = raw_text.split("\n")
structured_data = []
dialogue_pattern = re.compile(r"^([A-Z\s]+):\s*(.*)")
for line in lines:
line = line.strip()
if not line:
continue
match = dialogue_pattern.match(line)
if match:
structured_data.append({
"Character": match.group(1).strip(),
"Dialogue": match.group(2).strip()
})
df = pd.DataFrame(structured_data)
df["Full_Line"] = df["Character"] + ": " + df["Dialogue"]
window_size = 1
context_chunks = []
for i in range(len(df)):
start = max(0, i - window_size)
end = min(len(df), i + window_size + 1)
chunk = "\n".join(df["Full_Line"].iloc[start:end])
context_chunks.append({
"context_block": chunk
})
ai_df = pd.DataFrame(context_chunks)
print("⚑ Loading embedding model...")
model = SentenceTransformer("all-MiniLM-L6-v2")
vector_database = model.encode(ai_df["context_block"].tolist())
hf_token = os.environ.get("HF_TOKEN")
client = InferenceClient(model="meta-llama/Meta-Llama-3-8B-Instruct", token=hf_token)
def search_script(user_query, history):
if not user_query.strip():
return history, ""
try:
query_vector = model.encode(user_query)
hits = util.semantic_search(query_vector, vector_database, top_k=2)
retrieved_contexts = [
ai_df["context_block"].iloc[hit["corpus_id"]]
for hit in hits[0]
]
combined_context = "\n\n━━━━━━━━━━━━━━\n\n".join(retrieved_contexts)
messages = [
{
"role": "system",
"content": "You are an expert TV Script Analyst AI.\n\nRules:\n- Answer ONLY from provided excerpts\n- Never invent lore\n- Explain naturally\n- Be concise and smart"
},
{
"role": "user",
"content": f"Question:\n{user_query}\n\nScript Excerpts:\n{combined_context}"
}
]
response = client.chat.completions.create(
messages=messages,
max_tokens=220,
temperature=0.25
)
ai_answer = response.choices[0].message.content.strip()
except Exception as e:
ai_answer = f"❌ Error: {str(e)}"
combined_context = "No context retrieved."
formatted_response = f"## πŸ€– AI Analysis\n\n{ai_answer}\n\n---\n\n### πŸ“š Retrieved Script Context\n\n```text\n{combined_context}\n```\n"
history.append({"role": "user", "content": user_query})
history.append({"role": "assistant", "content": formatted_response})
return history, ""
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Orbitron:wght@500;700&family=Inter:wght@300;400;500;600&display=swap');
body { background: #050816 !important; }
.gradio-container { background: radial-gradient(circle at top left, #101935 0%, #050816 60%, #02030a 100%) !important; color: white !important; font-family: 'Inter', sans-serif; }
.main-title { text-align: center; font-size: 42px; font-family: 'Orbitron', sans-serif; font-weight: 700; background: linear-gradient(90deg, #00eaff, #6f8cff, #00eaff); -webkit-background-clip: text; -webkit-text-fill-color: transparent; text-shadow: 0 0 10px #00eaff, 0 0 20px #00eaff; }
.subtitle { text-align: center; color: #9ab0ff; font-size: 17px; margin-top: -10px; margin-bottom: 25px; }
.glass-chat { border-radius: 24px !important; background: rgba(255,255,255,0.06) !important; border: 1px solid rgba(255,255,255,0.08) !important; backdrop-filter: blur(20px); box-shadow: 0 0 35px rgba(0,255,255,0.12); }
textarea { background: rgba(255,255,255,0.06) !important; color: white !important; border-radius: 18px !important; border: 1px solid rgba(0,255,255,0.2) !important; box-shadow: 0 0 15px rgba(0,255,255,0.1); }
button { background: linear-gradient(135deg, #00d9ff, #6b63ff) !important; border: none !important; border-radius: 16px !important; color: white !important; font-weight: bold !important; box-shadow: 0 0 20px rgba(0,217,255,0.5) !important; transition: all 0.3s ease !important; }
button:hover { transform: scale(1.04); }
footer { display: none !important; }
"""
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
gr.HTML("""
<div class="main-title">AI Narrative Intelligence Engine</div>
<div class="subtitle">Semantic Retrieval + LLM Reasoning on Unstructured TV Dialogue</div>
""")
chatbot = gr.Chatbot(type="messages", height=600, elem_classes="glass-chat")
with gr.Row():
user_input = gr.Textbox(placeholder="Ask anything about the script...", lines=2, scale=8)
send_btn = gr.Button("πŸš€ Analyze", scale=1)
gr.Examples(
examples=[
["Who is Ranko Zamani?"],
["Why does Reddington want Elizabeth?"],
["What is the blacklist?"],
["What is Reddington offering?"]
],
inputs=user_input
)
send_btn.click(fn=search_script, inputs=[user_input, chatbot], outputs=[chatbot, user_input])
user_input.submit(fn=search_script, inputs=[user_input, chatbot], outputs=[chatbot, user_input])
demo.launch()