junaid17 commited on
Commit
d8b0675
·
verified ·
1 Parent(s): d8504c1

Update summarise_bot.py

Browse files
Files changed (1) hide show
  1. summarise_bot.py +384 -384
summarise_bot.py CHANGED
@@ -1,385 +1,385 @@
1
- # =========================
2
- # IMPORTS
3
- # =========================
4
- from langgraph.graph import StateGraph, START, END
5
- from typing import TypedDict
6
- from langchain_core.messages import HumanMessage
7
- from langchain_openai import ChatOpenAI
8
- from langchain_community.tools.tavily_search import TavilySearchResults
9
- from langchain_core.tools import tool
10
- import json
11
- from dotenv import load_dotenv
12
-
13
- load_dotenv()
14
-
15
-
16
- # =========================
17
- # TAVILY TOOL
18
- # =========================
19
- @tool
20
- def tavily_search(query: str) -> dict:
21
- """
22
- Perform a real-time web search using Tavily.
23
- """
24
- try:
25
- search = TavilySearchResults(max_results=2)
26
- results = search.run(query)
27
- return {"query": query, "results": results}
28
- except Exception as e:
29
- return {"error": str(e)}
30
-
31
-
32
- # =========================
33
- # LLM
34
- # =========================
35
- llm = ChatOpenAI(
36
- model="gpt-4.1-nano",
37
- temperature=0.4,
38
- streaming=True
39
- )
40
-
41
-
42
- # =========================
43
- # STATE
44
- # =========================
45
- class MovieState(TypedDict, total=False):
46
- title: str
47
- overview: str
48
- web_context: str
49
- key_plot_points: str
50
- iconic_moments: str
51
- themes: str
52
- interesting_facts: str
53
- songs: str
54
- trailer: str
55
- summary: str
56
-
57
-
58
- # =========================
59
- # NODE: FETCH WEB CONTEXT
60
- # =========================
61
- def fetch_web_context(state: MovieState):
62
- title = state["title"]
63
-
64
- query = f"""
65
- Find reliable and up-to-date information about the movie "{title}".
66
-
67
- Focus on:
68
- - Official trailers (studio or verified YouTube channels)
69
- - Soundtrack / songs (Spotify, Apple Music, IMDb soundtrack)
70
- - Verified trivia or interesting facts
71
- - Release details and reception (optional)
72
-
73
- Prefer sources like:
74
- - IMDb
75
- - Wikipedia
76
- - Official studio websites
77
- - Verified YouTube channels
78
- - Major entertainment publications
79
-
80
- Avoid:
81
- - Fan theories
82
- - Reviews without factual info
83
- - Opinion-heavy blogs
84
- """
85
-
86
- web = tavily_search.run(query)
87
-
88
- return {
89
- "web_context": str(web)
90
- }
91
-
92
- # =========================
93
- # HELPER PROMPT RUNNER
94
- # =========================
95
- def run_llm(prompt: str) -> str:
96
- return llm.invoke(prompt).content
97
-
98
-
99
- # =========================
100
- # ANALYSIS NODES
101
- # =========================
102
- def find_key_points(state: MovieState):
103
- prompt = f"""
104
- You are a professional movie analyst.
105
-
106
- Movie title: {state['title']}
107
-
108
- Overview:
109
- {state['overview']}
110
-
111
- Verified web context (may include reviews, trivia, or plot confirmations):
112
- {state['web_context']}
113
-
114
- Task:
115
- Extract the MOST IMPORTANT plot points that define the story.
116
-
117
- Guidelines:
118
- - Focus on STORY EVENTS, not themes or opinions
119
- - Keep it chronological
120
- - Avoid unnecessary details or long explanations
121
- - Do NOT invent scenes not supported by the overview or web context
122
-
123
- Output format (strict):
124
- - Bullet list
125
- - 5–7 plot points max
126
- - Each point: 1 concise sentence
127
- """
128
- return {"key_plot_points": run_llm(prompt)}
129
-
130
-
131
- def find_iconic_moments(state: MovieState):
132
- prompt = f"""
133
- You are a film analyst identifying ICONIC moments.
134
-
135
- Movie title: {state['title']}
136
-
137
- Overview:
138
- {state['overview']}
139
-
140
- Verified web context (reviews, trivia, cultural references):
141
- {state['web_context']}
142
-
143
- Task:
144
- Identify the most ICONIC moments from the movie.
145
-
146
- Definition of iconic:
147
- - Scenes that audiences remember most
148
- - Moments often referenced in reviews, memes, or pop culture
149
- - Visually, emotionally, or narratively standout scenes
150
-
151
- Guidelines:
152
- - Do NOT summarize the full plot
153
- - Avoid repeating basic plot points
154
- - Focus on memorable SCENES or MOMENTS
155
- - Base choices on common recognition (not personal opinion)
156
-
157
- Output format (strict):
158
- - Numbered list
159
- - 4–6 iconic moments
160
- - Each item:
161
- • Scene title (short)
162
- • One-sentence explanation of why it’s iconic
163
- """
164
- return {"iconic_moments": run_llm(prompt)}
165
-
166
- def find_themes(state: MovieState):
167
- prompt = f"""
168
- You are a movie analyst focusing on THEMES.
169
-
170
- Movie title: {state['title']}
171
-
172
- Overview:
173
- {state['overview']}
174
-
175
- Verified web context (critical analysis, reviews, commentary):
176
- {state['web_context']}
177
-
178
- Task:
179
- Identify the CORE THEMES explored in the movie.
180
-
181
- Guidelines:
182
- - Themes should be CONCEPTS (not plot points or morals)
183
- - Avoid vague words like "life" or "journey" unless specific
184
- - Base themes on story events and critical interpretation
185
- - Do NOT over-explain
186
-
187
- Output format (strict):
188
- - Bullet list
189
- - 3–5 themes only
190
- - Each theme format:
191
- **Theme name** – one concise explanatory sentence
192
- """
193
- return {"themes": run_llm(prompt)}
194
-
195
- def find_interesting_facts(state: MovieState):
196
- prompt = f"""
197
- You are a movie researcher collecting VERIFIED trivia.
198
-
199
- Movie title: {state['title']}
200
-
201
- Overview:
202
- {state['overview']}
203
-
204
- Verified web context (interviews, trivia, production notes, reviews):
205
- {state['web_context']}
206
-
207
- Task:
208
- Extract interesting and lesser-known facts about the movie.
209
-
210
- Guidelines:
211
- - Facts must be BASED on the web context or widely known sources
212
- - Avoid speculation or unverified claims
213
- - Focus on production, casting, behind-the-scenes, or reception
214
- - Do NOT repeat plot points
215
-
216
- Output format (strict):
217
- - Bullet list
218
- - 4–6 facts
219
- - Each fact:
220
- • One concise sentence
221
- • Clearly factual (no opinions)
222
- """
223
- return {"interesting_facts": run_llm(prompt)}
224
-
225
- def find_songs(state: MovieState):
226
- prompt = f"""
227
- You are extracting OFFICIAL soundtrack information.
228
-
229
- Movie title: {state['title']}
230
-
231
- Verified web context (soundtrack listings, music platforms, official sources):
232
- {state['web_context']}
233
-
234
- Task:
235
- Identify the official soundtrack songs associated with this movie.
236
-
237
- Rules:
238
- - Include ONLY officially released songs (not background score unless famous)
239
- - Prefer reliable sources (Spotify, YouTube, Apple Music, IMDb soundtrack)
240
- - Do NOT guess or invent songs
241
- - Do NOT add explanations or extra text
242
-
243
- Output format (STRICT — follow exactly):
244
- - One song per line
245
- - Each line format:
246
- [song name, official link]
247
-
248
- If no reliable song information is found:
249
- - Return an empty list: []
250
- """
251
- return {"songs": run_llm(prompt)}
252
-
253
-
254
- def find_trailer(state: MovieState):
255
- prompt = f"""
256
- You are retrieving OFFICIAL movie trailer information.
257
-
258
- Movie title: {state['title']}
259
-
260
- Verified web context (official YouTube channels, studio pages, IMDb, Wikipedia):
261
- {state['web_context']}
262
-
263
- Task:
264
- Find official trailer links for this movie.
265
-
266
- Rules:
267
- - ONLY official trailers (no fan edits, reactions, reviews)
268
- - Prefer studio or verified YouTube channels
269
- - Do NOT invent or approximate links
270
- - Do NOT include commentary or descriptions
271
-
272
- Output format (STRICT — follow exactly):
273
- - One trailer per line
274
- - Each line format:
275
- [trailer name, official link]
276
-
277
- If no official trailer is found:
278
- - Return an empty list: []
279
- """
280
- return {"trailer": run_llm(prompt)}
281
-
282
-
283
- # =========================
284
- # FINAL SUMMARY
285
- # =========================
286
- def generate_summary(state: MovieState):
287
- prompt = f"""
288
- You are generating a FINAL movie summary for a frontend application.
289
-
290
- Movie title: {state['title']}
291
-
292
- Use ONLY the information provided below.
293
- Do NOT add new facts.
294
- Do NOT use markdown.
295
- Do NOT include extra text.
296
-
297
- INPUT DATA
298
- ---------
299
-
300
- KEY PLOT POINTS:
301
- {state['key_plot_points']}
302
-
303
- ICONIC MOMENTS:
304
- {state['iconic_moments']}
305
-
306
- THEMES:
307
- {state['themes']}
308
-
309
- INTERESTING FACTS:
310
- {state['interesting_facts']}
311
-
312
- SONGS:
313
- {state['songs']}
314
-
315
- TRAILERS:
316
- {state['trailer']}
317
-
318
- ---------
319
-
320
- TASK:
321
- Return a VALID JSON object that follows this schema EXACTLY.
322
-
323
- JSON SCHEMA (STRICT):
324
- {{
325
- "overview": "23 sentence high-level movie overview",
326
- "key_moments": ["moment 1", "moment 2", "moment 3"],
327
- "themes": ["theme 1", "theme 2"],
328
- "notable_facts": ["fact 1", "fact 2"],
329
- "soundtrack_highlights": ["song name 1", "song name 2"],
330
- "official_trailer": "trailer name"
331
- }}
332
- """
333
- return {"summary": run_llm(prompt)}
334
-
335
-
336
-
337
- # =========================
338
- # GRAPH
339
- # =========================
340
- graph = StateGraph(MovieState)
341
-
342
- graph.add_node("fetch_web_context", fetch_web_context)
343
- graph.add_node("find_key_points", find_key_points)
344
- graph.add_node("find_iconic_moments", find_iconic_moments)
345
- graph.add_node("find_themes", find_themes)
346
- graph.add_node("find_interesting_facts", find_interesting_facts)
347
- graph.add_node("find_songs", find_songs)
348
- graph.add_node("find_trailer", find_trailer)
349
- graph.add_node("generate_summary", generate_summary)
350
-
351
- graph.add_edge(START, "fetch_web_context")
352
-
353
- graph.add_edge("fetch_web_context", "find_key_points")
354
- graph.add_edge("fetch_web_context", "find_iconic_moments")
355
- graph.add_edge("fetch_web_context", "find_themes")
356
- graph.add_edge("fetch_web_context", "find_interesting_facts")
357
- graph.add_edge("fetch_web_context", "find_songs")
358
- graph.add_edge("fetch_web_context", "find_trailer")
359
-
360
- graph.add_edge("find_key_points", "generate_summary")
361
- graph.add_edge("find_iconic_moments", "generate_summary")
362
- graph.add_edge("find_themes", "generate_summary")
363
- graph.add_edge("find_interesting_facts", "generate_summary")
364
- graph.add_edge("find_songs", "generate_summary")
365
- graph.add_edge("find_trailer", "generate_summary")
366
-
367
- graph.add_edge("generate_summary", END)
368
-
369
- workflow = graph.compile()
370
-
371
- def summarise_movie(title: str, overview: str):
372
- result = workflow.invoke({
373
- "title": title,
374
- "overview": overview
375
- })
376
-
377
- raw_summary = result["summary"]
378
-
379
- try:
380
- return json.loads(raw_summary)
381
- except json.JSONDecodeError:
382
- raise ValueError("LLM returned invalid JSON")
383
-
384
-
385
  #print(summarise_movie("Jumanji", "Four teenagers are sucked into a magical video game..."))
 
1
+ # =========================
2
+ # IMPORTS
3
+ # =========================
4
+ from langgraph.graph import StateGraph, START, END
5
+ from typing import TypedDict
6
+ from langchain_core.messages import HumanMessage
7
+ from langchain_openai import ChatOpenAI
8
+ from langchain_community.tools.tavily_search import TavilySearchResults
9
+ from langchain_core.tools import tool
10
+ import json
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
+
15
+
16
+ # =========================
17
+ # TAVILY TOOL
18
+ # =========================
19
+ @tool
20
+ def tavily_search(query: str) -> dict:
21
+ """
22
+ Perform a real-time web search using Tavily.
23
+ """
24
+ try:
25
+ search = TavilySearchResults(max_results=2)
26
+ results = search.run(query)
27
+ return {"query": query, "results": results}
28
+ except Exception as e:
29
+ return {"error": str(e)}
30
+
31
+
32
+ # =========================
33
+ # LLM
34
+ # =========================
35
+ llm = ChatOpenAI(
36
+ model="gpt-4.1-nano",
37
+ temperature=0.4,
38
+ streaming=True
39
+ )
40
+
41
+
42
+ # =========================
43
+ # STATE
44
+ # =========================
45
+ class MovieState(TypedDict, total=False):
46
+ title: str
47
+ overview: str
48
+ web_context: str
49
+ key_plot_points: str
50
+ iconic_moments: str
51
+ themes: str
52
+ interesting_facts: str
53
+ songs: str
54
+ trailer: str
55
+ summary: str
56
+
57
+
58
+ # =========================
59
+ # NODE: FETCH WEB CONTEXT
60
+ # =========================
61
+ def fetch_web_context(state: MovieState):
62
+ title = state["title"]
63
+
64
+ query = f"""
65
+ Find reliable and up-to-date information about the movie "{title}".
66
+
67
+ Focus on:
68
+ - Official trailers (studio or verified YouTube channels)
69
+ - Soundtrack / songs (Spotify, Apple Music, IMDb soundtrack)
70
+ - Verified trivia or interesting facts
71
+ - Release details and reception (optional)
72
+
73
+ Prefer sources like:
74
+ - IMDb
75
+ - Wikipedia
76
+ - Official studio websites
77
+ - Verified YouTube channels
78
+ - Major entertainment publications
79
+
80
+ Avoid:
81
+ - Fan theories
82
+ - Reviews without factual info
83
+ - Opinion-heavy blogs
84
+ """
85
+
86
+ web = tavily_search.run(query)
87
+
88
+ return {
89
+ "web_context": str(web)
90
+ }
91
+
92
+ # =========================
93
+ # HELPER PROMPT RUNNER
94
+ # =========================
95
+ def run_llm(prompt: str) -> str:
96
+ return llm.invoke(prompt).content
97
+
98
+
99
+ # =========================
100
+ # ANALYSIS NODES
101
+ # =========================
102
+ def find_key_points(state: MovieState):
103
+ prompt = f"""
104
+ You are a professional movie analyst.
105
+
106
+ Movie title: {state['title']}
107
+
108
+ Overview:
109
+ {state['overview']}
110
+
111
+ Verified web context (may include reviews, trivia, or plot confirmations):
112
+ {state['web_context']}
113
+
114
+ Task:
115
+ Extract the MOST IMPORTANT plot points that define the story.
116
+
117
+ Guidelines:
118
+ - Focus on STORY EVENTS, not themes or opinions
119
+ - Keep it chronological
120
+ - Avoid unnecessary details or long explanations
121
+ - Do NOT invent scenes not supported by the overview or web context
122
+
123
+ Output format (strict):
124
+ - Bullet list
125
+ - 5–7 plot points max
126
+ - Each point: 1 concise sentence
127
+ """
128
+ return {"key_plot_points": run_llm(prompt)}
129
+
130
+
131
+ def find_iconic_moments(state: MovieState):
132
+ prompt = f"""
133
+ You are a film analyst identifying ICONIC moments.
134
+
135
+ Movie title: {state['title']}
136
+
137
+ Overview:
138
+ {state['overview']}
139
+
140
+ Verified web context (reviews, trivia, cultural references):
141
+ {state['web_context']}
142
+
143
+ Task:
144
+ Identify the most ICONIC moments from the movie.
145
+
146
+ Definition of iconic:
147
+ - Scenes that audiences remember most
148
+ - Moments often referenced in reviews, memes, or pop culture
149
+ - Visually, emotionally, or narratively standout scenes
150
+
151
+ Guidelines:
152
+ - Do NOT summarize the full plot
153
+ - Avoid repeating basic plot points
154
+ - Focus on memorable SCENES or MOMENTS
155
+ - Base choices on common recognition (not personal opinion)
156
+
157
+ Output format (strict):
158
+ - Numbered list
159
+ - 4–6 iconic moments
160
+ - Each item:
161
+ • Scene title (short)
162
+ • One-sentence explanation of why it’s iconic
163
+ """
164
+ return {"iconic_moments": run_llm(prompt)}
165
+
166
+ def find_themes(state: MovieState):
167
+ prompt = f"""
168
+ You are a movie analyst focusing on THEMES.
169
+
170
+ Movie title: {state['title']}
171
+
172
+ Overview:
173
+ {state['overview']}
174
+
175
+ Verified web context (critical analysis, reviews, commentary):
176
+ {state['web_context']}
177
+
178
+ Task:
179
+ Identify the CORE THEMES explored in the movie.
180
+
181
+ Guidelines:
182
+ - Themes should be CONCEPTS (not plot points or morals)
183
+ - Avoid vague words like "life" or "journey" unless specific
184
+ - Base themes on story events and critical interpretation
185
+ - Do NOT over-explain
186
+
187
+ Output format (strict):
188
+ - Bullet list
189
+ - 3–5 themes only
190
+ - Each theme format:
191
+ **Theme name** – one concise explanatory sentence
192
+ """
193
+ return {"themes": run_llm(prompt)}
194
+
195
+ def find_interesting_facts(state: MovieState):
196
+ prompt = f"""
197
+ You are a movie researcher collecting VERIFIED trivia.
198
+
199
+ Movie title: {state['title']}
200
+
201
+ Overview:
202
+ {state['overview']}
203
+
204
+ Verified web context (interviews, trivia, production notes, reviews):
205
+ {state['web_context']}
206
+
207
+ Task:
208
+ Extract interesting and lesser-known facts about the movie.
209
+
210
+ Guidelines:
211
+ - Facts must be BASED on the web context or widely known sources
212
+ - Avoid speculation or unverified claims
213
+ - Focus on production, casting, behind-the-scenes, or reception
214
+ - Do NOT repeat plot points
215
+
216
+ Output format (strict):
217
+ - Bullet list
218
+ - 4–6 facts
219
+ - Each fact:
220
+ • One concise sentence
221
+ • Clearly factual (no opinions)
222
+ """
223
+ return {"interesting_facts": run_llm(prompt)}
224
+
225
+ def find_songs(state: MovieState):
226
+ prompt = f"""
227
+ You are extracting OFFICIAL soundtrack information.
228
+
229
+ Movie title: {state['title']}
230
+
231
+ Verified web context (soundtrack listings, music platforms, official sources):
232
+ {state['web_context']}
233
+
234
+ Task:
235
+ Identify the official soundtrack songs associated with this movie.
236
+
237
+ Rules:
238
+ - Include ONLY officially released songs (not background score unless famous)
239
+ - Prefer reliable sources (Spotify, YouTube, Apple Music, IMDb soundtrack)
240
+ - Do NOT guess or invent songs
241
+ - Do NOT add explanations or extra text
242
+
243
+ Output format (STRICT — follow exactly):
244
+ - One song per line
245
+ - Each line format:
246
+ [song name, official link]
247
+
248
+ If no reliable song information is found:
249
+ - Return an empty list: []
250
+ """
251
+ return {"songs": run_llm(prompt)}
252
+
253
+
254
+ def find_trailer(state: MovieState):
255
+ prompt = f"""
256
+ You are retrieving OFFICIAL movie trailer information.
257
+
258
+ Movie title: {state['title']}
259
+
260
+ Verified web context (official YouTube channels, studio pages, IMDb, Wikipedia):
261
+ {state['web_context']}
262
+
263
+ Task:
264
+ Find official trailer links for this movie.
265
+
266
+ Rules:
267
+ - ONLY official trailers (no fan edits, reactions, reviews)
268
+ - Prefer studio or verified YouTube channels
269
+ - Do NOT invent or approximate links
270
+ - Do NOT include commentary or descriptions
271
+
272
+ Output format (STRICT — follow exactly):
273
+ - One trailer per line
274
+ - Each line format:
275
+ [trailer name, official link]
276
+
277
+ If no official trailer is found:
278
+ - Return an empty list: []
279
+ """
280
+ return {"trailer": run_llm(prompt)}
281
+
282
+
283
+ # =========================
284
+ # FINAL SUMMARY
285
+ # =========================
286
+ def generate_summary(state: MovieState):
287
+ prompt = f"""
288
+ You are generating a FINAL movie summary for a frontend application.
289
+
290
+ Movie title: {state['title']}
291
+
292
+ Use ONLY the information provided below.
293
+ Do NOT add new facts.
294
+ Do NOT use markdown.
295
+ Do NOT include extra text.
296
+
297
+ INPUT DATA
298
+ ---------
299
+
300
+ KEY PLOT POINTS:
301
+ {state['key_plot_points']}
302
+
303
+ ICONIC MOMENTS:
304
+ {state['iconic_moments']}
305
+
306
+ THEMES:
307
+ {state['themes']}
308
+
309
+ INTERESTING FACTS:
310
+ {state['interesting_facts']}
311
+
312
+ SONGS:
313
+ {state['songs']}
314
+
315
+ TRAILERS:
316
+ {state['trailer']}
317
+
318
+ ---------
319
+
320
+ TASK:
321
+ Return a VALID JSON object that follows this schema EXACTLY.
322
+
323
+ JSON SCHEMA (STRICT):
324
+ {{
325
+ "overview": "47 sentence high-level movie overview",
326
+ "key_moments": ["moment 1", "moment 2", "moment 3", "moment 4"],
327
+ "themes": ["theme 1", "theme 2", "theme 3"],
328
+ "notable_facts": ["fact 1", "fact 2", "fact 3"],
329
+ "soundtrack_highlights": ["song name 1", "song name 2"],
330
+ "official_trailer": "trailer name"
331
+ }}
332
+ """
333
+ return {"summary": run_llm(prompt)}
334
+
335
+
336
+
337
+ # =========================
338
+ # GRAPH
339
+ # =========================
340
+ graph = StateGraph(MovieState)
341
+
342
+ graph.add_node("fetch_web_context", fetch_web_context)
343
+ graph.add_node("find_key_points", find_key_points)
344
+ graph.add_node("find_iconic_moments", find_iconic_moments)
345
+ graph.add_node("find_themes", find_themes)
346
+ graph.add_node("find_interesting_facts", find_interesting_facts)
347
+ graph.add_node("find_songs", find_songs)
348
+ graph.add_node("find_trailer", find_trailer)
349
+ graph.add_node("generate_summary", generate_summary)
350
+
351
+ graph.add_edge(START, "fetch_web_context")
352
+
353
+ graph.add_edge("fetch_web_context", "find_key_points")
354
+ graph.add_edge("fetch_web_context", "find_iconic_moments")
355
+ graph.add_edge("fetch_web_context", "find_themes")
356
+ graph.add_edge("fetch_web_context", "find_interesting_facts")
357
+ graph.add_edge("fetch_web_context", "find_songs")
358
+ graph.add_edge("fetch_web_context", "find_trailer")
359
+
360
+ graph.add_edge("find_key_points", "generate_summary")
361
+ graph.add_edge("find_iconic_moments", "generate_summary")
362
+ graph.add_edge("find_themes", "generate_summary")
363
+ graph.add_edge("find_interesting_facts", "generate_summary")
364
+ graph.add_edge("find_songs", "generate_summary")
365
+ graph.add_edge("find_trailer", "generate_summary")
366
+
367
+ graph.add_edge("generate_summary", END)
368
+
369
+ workflow = graph.compile()
370
+
371
+ def summarise_movie(title: str, overview: str):
372
+ result = workflow.invoke({
373
+ "title": title,
374
+ "overview": overview
375
+ })
376
+
377
+ raw_summary = result["summary"]
378
+
379
+ try:
380
+ return json.loads(raw_summary)
381
+ except json.JSONDecodeError:
382
+ raise ValueError("LLM returned invalid JSON")
383
+
384
+
385
  #print(summarise_movie("Jumanji", "Four teenagers are sucked into a magical video game..."))