Spaces:

BhavyaSamhithaMallineni
/

FilmScriptSummary

Runtime error

App Files Files Community

BhavyaSamhithaMallineni commited on Jun 10, 2025

Commit

2575aee

verified ·

1 Parent(s): ec4d795

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -3

app.py CHANGED Viewed

@@ -1,4 +1,139 @@
-from utils import chunk_script
-chunks = chunk_script(user_input)
-summary = summarizer(chunks[0])[0]['summary_text']

+import fitz  # PyMuPDF
+import gradio as gr
+import json
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from groq import Groq
+import os
+# Load API key
+API_KEY = os.getenv('Groq')
+if not API_KEY:
+    raise ValueError("API Key is missing! Set the environment variable 'GROQ_API_KEY'.")
+# Initialize Groq Client
+client = Groq(api_key=API_KEY)
+# Prompt Template
+PROMPT_TEMPLATE = """
+You are an expert screenplay analyst. Convert the following film script text into the JSON structure below:
+{json_structure}
+Script Text:
+{text}
+Provide only the JSON response.
+""".strip()
+# Define the JSON structure to be extracted
+JSON_STRUCTURE = {
+    "scenes": [
+        {
+            "scene_heading": "",
+            "location": "",
+            "time_of_day": "",
+            "characters": [],
+            "emotions": [],
+            "summary": "",
+            "dialogues": [
+                {
+                    "character": "",
+                    "dialogue_text": "",
+                    "tone": ""
+                }
+            ]
+        }
+    ],
+    "overall_emotional_arc": [],
+    "story_beats": {
+        "setup": "",
+        "inciting_incident": "",
+        "climax": "",
+        "resolution": ""
+    }
+}
+# Function to extract text from PDF
+def extract_text_from_pdf(pdf_file):
+    text = ""
+    try:
+        with open(pdf_file.name, 'rb') as f:
+            doc = fitz.open(stream=f.read(), filetype="pdf")
+            for page in doc:
+                text += page.get_text() + "\n"
+    except Exception as e:
+        return f"Error reading PDF: {e}"
+    return text.strip()
+# Function to split text into chunks
+def split_text_into_chunks(text, chunk_size=2000):
+    splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=100)
+    return splitter.split_text(text)
+# Function to call Groq API
+def call_llm_api(text):
+    prompt = PROMPT_TEMPLATE.format(json_structure=json.dumps(JSON_STRUCTURE, indent=2), text=text)
+    try:
+        response = client.chat.completions.create(
+            messages=[{"role": "user", "content": prompt}],
+            model="llama-3-3-70b-vision",  # You can also test llama-3-70b-versatile
+        )
+        raw_content = response.choices[0].message.content.strip()
+        # Clean JSON formatting
+        if raw_content.startswith("```json") and raw_content.endswith("```"):
+            raw_content = raw_content[7:-3].strip()
+        return json.loads(raw_content)
+    except Exception as e:
+        return {"error": f"API call failed: {e}"}
+# Function to merge JSON chunks
+def merge_json_chunks(chunks):
+    combined_result = JSON_STRUCTURE.copy()
+    combined_result["scenes"] = []
+    combined_result["overall_emotional_arc"] = []
+    for chunk in chunks:
+        result = call_llm_api(chunk)
+        if "error" in result:
+            continue
+        # Merge scenes
+        if "scenes" in result:
+            combined_result["scenes"].extend(result["scenes"])
+        # Merge emotional arc
+        if "overall_emotional_arc" in result:
+            combined_result["overall_emotional_arc"].extend(result["overall_emotional_arc"])
+        # Merge story beats only once (first time we encounter valid values)
+        for beat in combined_result["story_beats"].keys():
+            if result.get("story_beats", {}).get(beat) and not combined_result["story_beats"][beat]:
+                combined_result["story_beats"][beat] = result["story_beats"][beat]
+    return combined_result
+# Gradio interface function
+def gradio_interface(file):
+    pdf_text = extract_text_from_pdf(file)
+    if pdf_text.startswith("Error"):
+        return {"error": pdf_text}
+    chunks = split_text_into_chunks(pdf_text)
+    extracted_data = merge_json_chunks(chunks)
+    return extracted_data
+# Gradio UI
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=gr.File(label="Upload Film Script PDF"),
+    outputs="json",
+    title="ScriptWhisper - Screenplay Structure & Emotion Extractor",
+    description="Upload a screenplay PDF to extract scene structure, emotional arc, and story beats."
+)
+# Launch the app
+if __name__ == "__main__":
+    iface.launch()