BhavyaSamhithaMallineni commited on
Commit
2575aee
·
verified ·
1 Parent(s): ec4d795

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -3
app.py CHANGED
@@ -1,4 +1,139 @@
1
- from utils import chunk_script
 
 
 
 
 
2
 
3
- chunks = chunk_script(user_input)
4
- summary = summarizer(chunks[0])[0]['summary_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import gradio as gr
3
+ import json
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from groq import Groq
6
+ import os
7
 
8
+ # Load API key
9
+ API_KEY = os.getenv('Groq')
10
+ if not API_KEY:
11
+ raise ValueError("API Key is missing! Set the environment variable 'GROQ_API_KEY'.")
12
+
13
+ # Initialize Groq Client
14
+ client = Groq(api_key=API_KEY)
15
+
16
+ # Prompt Template
17
+ PROMPT_TEMPLATE = """
18
+ You are an expert screenplay analyst. Convert the following film script text into the JSON structure below:
19
+ {json_structure}
20
+
21
+ Script Text:
22
+ {text}
23
+
24
+ Provide only the JSON response.
25
+ """.strip()
26
+
27
+ # Define the JSON structure to be extracted
28
+ JSON_STRUCTURE = {
29
+ "scenes": [
30
+ {
31
+ "scene_heading": "",
32
+ "location": "",
33
+ "time_of_day": "",
34
+ "characters": [],
35
+ "emotions": [],
36
+ "summary": "",
37
+ "dialogues": [
38
+ {
39
+ "character": "",
40
+ "dialogue_text": "",
41
+ "tone": ""
42
+ }
43
+ ]
44
+ }
45
+ ],
46
+ "overall_emotional_arc": [],
47
+ "story_beats": {
48
+ "setup": "",
49
+ "inciting_incident": "",
50
+ "climax": "",
51
+ "resolution": ""
52
+ }
53
+ }
54
+
55
+ # Function to extract text from PDF
56
+ def extract_text_from_pdf(pdf_file):
57
+ text = ""
58
+ try:
59
+ with open(pdf_file.name, 'rb') as f:
60
+ doc = fitz.open(stream=f.read(), filetype="pdf")
61
+ for page in doc:
62
+ text += page.get_text() + "\n"
63
+ except Exception as e:
64
+ return f"Error reading PDF: {e}"
65
+ return text.strip()
66
+
67
+ # Function to split text into chunks
68
+ def split_text_into_chunks(text, chunk_size=2000):
69
+ splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=100)
70
+ return splitter.split_text(text)
71
+
72
+ # Function to call Groq API
73
+ def call_llm_api(text):
74
+ prompt = PROMPT_TEMPLATE.format(json_structure=json.dumps(JSON_STRUCTURE, indent=2), text=text)
75
+
76
+ try:
77
+ response = client.chat.completions.create(
78
+ messages=[{"role": "user", "content": prompt}],
79
+ model="llama-3-3-70b-vision", # You can also test llama-3-70b-versatile
80
+ )
81
+
82
+ raw_content = response.choices[0].message.content.strip()
83
+
84
+ # Clean JSON formatting
85
+ if raw_content.startswith("```json") and raw_content.endswith("```"):
86
+ raw_content = raw_content[7:-3].strip()
87
+
88
+ return json.loads(raw_content)
89
+ except Exception as e:
90
+ return {"error": f"API call failed: {e}"}
91
+
92
+ # Function to merge JSON chunks
93
+ def merge_json_chunks(chunks):
94
+ combined_result = JSON_STRUCTURE.copy()
95
+ combined_result["scenes"] = []
96
+ combined_result["overall_emotional_arc"] = []
97
+
98
+ for chunk in chunks:
99
+ result = call_llm_api(chunk)
100
+ if "error" in result:
101
+ continue
102
+
103
+ # Merge scenes
104
+ if "scenes" in result:
105
+ combined_result["scenes"].extend(result["scenes"])
106
+
107
+ # Merge emotional arc
108
+ if "overall_emotional_arc" in result:
109
+ combined_result["overall_emotional_arc"].extend(result["overall_emotional_arc"])
110
+
111
+ # Merge story beats only once (first time we encounter valid values)
112
+ for beat in combined_result["story_beats"].keys():
113
+ if result.get("story_beats", {}).get(beat) and not combined_result["story_beats"][beat]:
114
+ combined_result["story_beats"][beat] = result["story_beats"][beat]
115
+
116
+ return combined_result
117
+
118
+ # Gradio interface function
119
+ def gradio_interface(file):
120
+ pdf_text = extract_text_from_pdf(file)
121
+ if pdf_text.startswith("Error"):
122
+ return {"error": pdf_text}
123
+
124
+ chunks = split_text_into_chunks(pdf_text)
125
+ extracted_data = merge_json_chunks(chunks)
126
+ return extracted_data
127
+
128
+ # Gradio UI
129
+ iface = gr.Interface(
130
+ fn=gradio_interface,
131
+ inputs=gr.File(label="Upload Film Script PDF"),
132
+ outputs="json",
133
+ title="ScriptWhisper - Screenplay Structure & Emotion Extractor",
134
+ description="Upload a screenplay PDF to extract scene structure, emotional arc, and story beats."
135
+ )
136
+
137
+ # Launch the app
138
+ if __name__ == "__main__":
139
+ iface.launch()