rairo commited on
Commit
b47e518
·
verified ·
1 Parent(s): 0ae7617

Update stories.py

Browse files
Files changed (1) hide show
  1. stories.py +455 -0
stories.py CHANGED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import time
3
+ import tempfile
4
+ import requests
5
+ import json
6
+ from google import genai
7
+ from google.genai import types
8
+ import google.generativeai as genai
9
+ import io
10
+ import base64
11
+ import numpy as np
12
+ import cv2
13
+ import logging
14
+ import uuid
15
+ import subprocess
16
+ from pathlib import Path
17
+ import wikipedia # using the PyPI wikipedia package
18
+ import urllib.parse
19
+ import pandas as pd
20
+ from PyPDF2 import PdfReader
21
+ import plotly.graph_objects as go
22
+ import matplotlib.pyplot as plt
23
+ from langchain_google_genai import ChatGoogleGenerativeAI
24
+ # For PandasAI using a single dataframe
25
+ from pandasai import SmartDataframe
26
+ from pandasai.responses.response_parser import ResponseParser
27
+ #from langchain_community.chat_models.sambanova import ChatSambaNovaCloud
28
+ from pandasai.exceptions import InvalidOutputValueMismatch
29
+ import base64
30
+ import os
31
+ import uuid
32
+ import matplotlib
33
+ import matplotlib.pyplot as plt
34
+ from io import BytesIO
35
+ import dataframe_image as dfi
36
+ import uuid
37
+ from supadata import Supadata, SupadataError
38
+ from PIL import ImageFont, ImageDraw, Image
39
+ import seaborn as sns
40
+ from flask import jsonify
41
+
42
+
43
+ # -----------------------
44
+ # Configuration and Logging
45
+ # -----------------------
46
+ logging.basicConfig(level=logging.INFO)
47
+ logger = logging.getLogger(__name__)
48
+
49
+ guid = uuid.uuid4()
50
+ new_filename = f"{guid}"
51
+ user_defined_path = os.path.join("/exports/charts", new_filename)
52
+
53
+ class FlaskResponse(ResponseParser):
54
+ def __init__(self, context):
55
+ super().__init__(context)
56
+
57
+ def format_dataframe(self, result):
58
+ return result["value"].to_html()
59
+
60
+ def format_plot(self, result):
61
+ val = result["value"]
62
+ # If val is a matplotlib figure, handle it accordingly.
63
+ if hasattr(val, "savefig"):
64
+ try:
65
+ buf = io.BytesIO()
66
+ val.savefig(buf, format="png")
67
+ buf.seek(0)
68
+ image_base64 = base64.b64encode(buf.read()).decode("utf-8")
69
+ return f"data:image/png;base64,{image_base64}"
70
+ except Exception as e:
71
+ print("Error processing figure:", e)
72
+ return str(val)
73
+ # If val is a string and is a valid file path, read and encode it.
74
+ if isinstance(val, str) and os.path.isfile(os.path.join(val)):
75
+ image_path = os.path.join(val)
76
+ print("My image path:", image_path)
77
+ with open(image_path, "rb") as file:
78
+ data = file.read()
79
+ base64_data = base64.b64encode(data).decode("utf-8")
80
+ return f"data:image/png;base64,{base64_data}"
81
+ # Fallback: return as a string.
82
+ return str(val)
83
+
84
+ def format_other(self, result):
85
+ # For non-image responses, simply return the value as a string.
86
+ return str(result["value"])
87
+
88
+
89
+
90
+
91
+
92
+
93
+ # Pandasai gemini
94
+ llm1 = ChatGoogleGenerativeAI(
95
+ model="gemini-2.0-flash-thinking-exp",
96
+ temperature=0,
97
+ max_tokens=None,
98
+ timeout=1000,
99
+ max_retries=2
100
+ )
101
+
102
+ # Initialize the supdata client
103
+ SUPADATA = os.getenv('SUPADATA')
104
+ supadata = Supadata(api_key=f"{SUPADATA}")
105
+ # -----------------------
106
+ # Utility Constants
107
+ # -----------------------
108
+ MAX_CHARACTERS = 200000 # Approximate token limit: 50,000 tokens ~ 200,000 characters
109
+
110
+ def configure_gemini(api_key):
111
+ try:
112
+ genai.configure(api_key=api_key)
113
+ return genai.GenerativeModel('gemini-2.0-flash-thinking-exp')
114
+ except Exception as e:
115
+ logger.error(f"Error configuring Gemini: {str(e)}")
116
+ raise
117
+
118
+ # Initialize Gemini model for story generation
119
+ model = configure_gemini(GOOGLE_API_KEY)
120
+ os.environ["GEMINI_API_KEY"] = GOOGLE_API_KEY
121
+
122
+ # -----------------------
123
+ # File Upload Helpers
124
+ # -----------------------
125
+ def get_pdf_text(pdf_file):
126
+ """Extract text from a PDF file and enforce token limit."""
127
+ text = ""
128
+ pdf_reader = PdfReader(pdf_file)
129
+ for page in pdf_reader.pages:
130
+ page_text = page.extract_text()
131
+ if page_text:
132
+ text += page_text + "\n"
133
+ if len(text) > MAX_CHARACTERS:
134
+ text = text[:MAX_CHARACTERS]
135
+ return text
136
+
137
+
138
+ # -----------------------
139
+ # Audio Transcription
140
+ # -----------------------
141
+
142
+ def transcribe_audio(audio_file):
143
+ """
144
+ Transcribe audio using DeepGram's API (model: nova-3).
145
+ Expects a WAV audio file.
146
+ """
147
+ deepgram_api_key = os.getenv("DeepGram")
148
+ if not deepgram_api_key:
149
+ st.error("DeepGram API Key is missing. Please set DEEPGRAM_API_KEY in environment variables.")
150
+ return None
151
+ headers_transcribe = {
152
+ "Authorization": f"Token {deepgram_api_key}",
153
+ "Content-Type": "audio/wav"
154
+ }
155
+ url = "https://api.deepgram.com/v1/listen?model=nova-3"
156
+ try:
157
+ audio_bytes = audio_file.read()
158
+ response = requests.post(url, headers=headers_transcribe, data=audio_bytes)
159
+ if response.status_code == 200:
160
+ data = response.json()
161
+ transcription = data.get("text", "")
162
+ return transcription
163
+ else:
164
+ st.error(f"Deepgram transcription error: {response.status_code}")
165
+ return None
166
+ except Exception as e:
167
+ st.error(f"Error during transcription: {e}")
168
+ return None
169
+
170
+ # -----------------------
171
+ # PandasAI Response for DataFrame (using SmartDataframe and ChatSambaNovaCloud)
172
+ # -----------------------
173
+ def generateResponse(prompt, df):
174
+
175
+ """Generate response using PandasAI with SmartDataframe and the ChatSambaNovaCloud LLM."""
176
+
177
+ pandas_agent = SmartDataframe(
178
+ df,
179
+ config={
180
+ "llm": llm,
181
+ "response_parser": FlaskResponse,
182
+ "custom_whitelisted_dependencies": [
183
+ "os",
184
+ "io",
185
+ "sys",
186
+ "chr",
187
+ "glob",
188
+ "b64decoder",
189
+ "collections",
190
+ "geopy",
191
+ "geopandas",
192
+ "wordcloud",
193
+ "builtins"
194
+ ],
195
+ "security": "none", "save_charts_path": user_defined_path, "save_charts": False, "enable_cache": False,
196
+ }
197
+ )
198
+
199
+ # Get the answer from the agent
200
+ answer = pandas_agent.chat(user_question)
201
+
202
+ # Process the answer based on its type
203
+ formatted_answer = None
204
+ if isinstance(answer, pd.DataFrame):
205
+ formatted_answer = answer.to_html()
206
+ elif isinstance(answer, plt.Figure):
207
+ buf = io.BytesIO()
208
+ answer.savefig(buf, format="png")
209
+ buf.seek(0)
210
+ image_base64 = base64.b64encode(buf.read()).decode("utf-8")
211
+ formatted_answer = f"data:image/png;base64,{image_base64}"
212
+ elif isinstance(answer, (int, float)):
213
+ formatted_answer = str(answer)
214
+ else:
215
+ formatted_answer = str(answer)
216
+
217
+ # Return the formatted answer as JSON.
218
+ return jsonify({"answer": formatted_answer})
219
+
220
+ # -----------------------
221
+ # DataFrame-Based Story Generation (for CSV/Excel files)
222
+ # -----------------------
223
+ def generate_story_from_dataframe(df, story_type):
224
+ """
225
+ Generate a data-based story from a CSV/Excel file.
226
+ The dataframe is converted to a JSON string and used as input in a prompt that instructs the model to produce
227
+ exactly 5 sections. Each section includes a brief analysis and an image description inside <>.
228
+ For dataframe stories, the image descriptions should be chart prompts based on the data.
229
+ """
230
+ df_json = json.dumps(df.to_dict())
231
+ prompts = {
232
+ "free_form": "You are a professional storyteller. Using the following dataset in JSON format: " + df_json +
233
+ ", create an engaging and concise story. ",
234
+ "children": "You are a professional storyteller writing stories for children. Using the following dataset in JSON format: " + df_json +
235
+ ", create a fun, factual, and concise story appropriate for children. ",
236
+ "education": "You are a professional storyteller writing educational content. Using the following dataset in JSON format: " + df_json +
237
+ ", create an informative, engaging, and concise educational story. Include interesting facts while keeping it engaging. ",
238
+ "business": "You are a professional storyteller specializing in business narratives. Using the following dataset in JSON format: " + df_json +
239
+ ", create a professional, concise business story with practical insights. ",
240
+ "entertainment": "You are a professional storyteller writing creative entertaining stories. Using the following dataset in JSON format: " + df_json +
241
+ ", create an engaging and concise entertaining story. Include interesting facts while keeping it engaging. "
242
+ }
243
+ story_prompt = prompts.get(story_type, prompts["free_form"])
244
+ full_prompt = (
245
+ story_prompt +
246
+ "Write a story for a narrator meaning no labels of pages or sections the story should just flow. Divide your story into exactly 5 short and concise sections separated by [break]. " +
247
+ "For each section, provide a brief narrative analysis and include, within angle brackets <>, a clear and plain-text description of a chart visualization that would represent the data. " +
248
+ "Limit the descriptions by specifying only charts. " +
249
+ "Ensure that your response contains only natural language descriptions examples: 'bar chart of', 'pie chart of' , 'histogram of', 'scatterplot of', 'boxplot of' etc and nothing else."
250
+ )
251
+
252
+ try:
253
+ response = model.generate_content(full_prompt)
254
+ if not response or not response.text:
255
+ return None
256
+
257
+ # Ensure exactly 5 sections
258
+ sections = response.text.split("[break]")
259
+ sections = [s.strip() for s in sections if s.strip()] # Remove empty sections
260
+
261
+ if len(sections) < 5:
262
+ sections += ["(Placeholder section)"] * (5 - len(sections)) # Fill missing sections
263
+ elif len(sections) > 5:
264
+ sections = sections[:5] # Trim excess sections
265
+
266
+ return "[break]".join(sections)
267
+
268
+ except Exception as e:
269
+ st.error(f"Error generating story from dataframe: {e}")
270
+ return None
271
+
272
+
273
+ # -----------------------
274
+ # Existing Story Generation Functions (Text, Wikipedia, Bible, Youtube(new))
275
+ # -----------------------
276
+ def generate_story_from_text(prompt_text, story_type):
277
+ prompts = {
278
+ "free_form": "You are a professional storyteller. Based on the prompt: " + prompt_text + ", create an engaging and concise story. ",
279
+ "children": "You are a professional storyteller for children. Based on the prompt: " + prompt_text + ", create a fun and concise story. ",
280
+ "education": "You are a professional storyteller. Based on the prompt: " + prompt_text + ", create an educational and engaging story. ",
281
+ "business": "You are a professional storyteller. Based on the prompt: " + prompt_text + ", create a professional business story. ",
282
+ "entertainment": "You are a professional storyteller. Based on the prompt: " + prompt_text + ", create an entertaining and concise story. "
283
+ }
284
+ story_prompt = prompts.get(story_type, prompts["free_form"])
285
+ response = model.generate_content(
286
+ story_prompt +
287
+ "Write a short story for a narrator meaning no labels of pages or sections the story should just flow and narrated in 2 minutes or less. Divide your story into exactly 5 sections separated by [break]. For each section, include an image description inside <>."
288
+ )
289
+ return response.text if response else None
290
+
291
+ def generate_story_from_wiki(wiki_url, story_type):
292
+ try:
293
+ page_title = wiki_url.rstrip("/").split("/")[-1]
294
+ wikipedia.set_lang("en")
295
+ page = wikipedia.page(page_title)
296
+ wiki_text = page.summary
297
+ prompts = {
298
+ "free_form": "You are a professional storyteller. Using the following Wikipedia info: " + wiki_text +
299
+ ", create an engaging and concise story. ",
300
+ "children": "You are a professional storyteller for children. Using the following Wikipedia info: " + wiki_text +
301
+ ", create a fun and concise story. ",
302
+ "education": "You are a professional storyteller. Using the following Wikipedia info: " + wiki_text +
303
+ ", create an educational and engaging story. ",
304
+ "business": "You are a professional storyteller. Using the following Wikipedia info: " + wiki_text +
305
+ ", create a professional business story. ",
306
+ "entertainment": "You are a professional storyteller. Using the following Wikipedia info: " + wiki_text +
307
+ ", create an entertaining and concise story. "
308
+ }
309
+ story_prompt = prompts.get(story_type, prompts["free_form"])
310
+ response = model.generate_content(
311
+ story_prompt +
312
+ "Write a short story for a narrator meaning no labels of pages or sections the story should just flow and narrated in 2 minutes or less. Divide your story into exactly 5 sections separated by [break]. For each section, include an image description inside <>."
313
+ )
314
+ return response.text if response else None
315
+ except Exception as e:
316
+ st.error(f"Error generating story from Wikipedia: {e}")
317
+ return None
318
+
319
+ def fetch_bible_text(reference):
320
+ m = re.match(r"(?P<book>[1-3]?\s*\w+(?:\s+\w+)*)\s+(?P<chapter>\d+)(?::(?P<verse_start>\d+)(?:-(?P<verse_end>\d+))?)?", reference)
321
+ if not m:
322
+ st.error("Bible reference format invalid. Use format like 'Genesis 1:1-5' or 'Psalms 23'.")
323
+ return None
324
+ book = m.group("book").strip().lower().replace(" ", "")
325
+ chapter = m.group("chapter")
326
+ verse_start = m.group("verse_start")
327
+ verse_end = m.group("verse_end")
328
+ if verse_start:
329
+ if verse_end is None:
330
+ verse_range = [verse_start]
331
+ else:
332
+ verse_range = [str(v) for v in range(int(verse_start), int(verse_end) + 1)]
333
+ verses_text = []
334
+ for verse in verse_range:
335
+ url = f"https://cdn.jsdelivr.net/gh/wldeh/bible-api/bibles/en-asv/books/{book}/chapters/{chapter}/verses/{verse}.json"
336
+ try:
337
+ response = requests.get(url)
338
+ if response.status_code == 200:
339
+ data = response.json()
340
+ verses_text.append(data.get("text", ""))
341
+ else:
342
+ verses_text.append(f"[Error fetching verse {verse}]")
343
+ except Exception as e:
344
+ verses_text.append(f"[Exception fetching verse {verse}: {e}]")
345
+ return " ".join(verses_text)
346
+ else:
347
+ url = f"https://cdn.jsdelivr.net/gh/wldeh/bible-api/bibles/en-asv/books/{book}/chapters/{chapter}.json"
348
+ try:
349
+ response = requests.get(url)
350
+ if response.status_code == 200:
351
+ data = response.json()
352
+ if isinstance(data, list):
353
+ verses = [verse.get("text", "") for verse in data]
354
+ return " ".join(verses)
355
+ elif isinstance(data, dict) and "verses" in data:
356
+ verses = [verse.get("text", "") for verse in data["verses"]]
357
+ return " ".join(verses)
358
+ else:
359
+ return str(data)
360
+ else:
361
+ st.error("Error fetching chapter text.")
362
+ return None
363
+ except Exception as e:
364
+ st.error(f"Exception fetching chapter: {e}")
365
+ return None
366
+
367
+ def generate_story_from_bible(reference, story_type):
368
+ bible_text = fetch_bible_text(reference)
369
+ if bible_text is None:
370
+ return None
371
+ prompts = {
372
+ "free_form": "You are a professional storyteller. Using the following Bible text: " + bible_text +
373
+ ", create an engaging and concise story. ",
374
+ "children": "You are a professional storyteller for children. Using the following Bible text: " + bible_text +
375
+ ", create a fun and concise story. ",
376
+ "education": "You are a professional storyteller. Using the following Bible text: " + bible_text +
377
+ ", create an educational and engaging story. ",
378
+ "business": "You are a professional storyteller. Using the following Bible text: " + bible_text +
379
+ ", create a professional business story. ",
380
+ "entertainment": "You are a professional storyteller. Using the following Bible text: " + bible_text +
381
+ ", create an entertaining and concise story. "
382
+ }
383
+ story_prompt = prompts.get(story_type, prompts["free_form"])
384
+ response = model.generate_content(
385
+ story_prompt +
386
+ "Write a short story for a narrator meaning no labels of pages or sections the story should just flow and narrated in 2 minutes or less. Divide your story into exactly 5 sections separated by [break]. For each section, include a brief image description inside <>."
387
+ )
388
+ return response.text if response else None
389
+
390
+
391
+ def generate_story_from_youtube(youtube_url, story_type):
392
+ try:
393
+ # Extract video_id from the URL
394
+ if "v=" in youtube_url:
395
+ video_id = youtube_url.split("v=")[1].split("&")[0]
396
+ elif "youtu.be/" in youtube_url:
397
+ video_id = youtube_url.split("youtu.be/")[1].split("?")[0]
398
+ else:
399
+ raise ValueError("Invalid YouTube URL provided.")
400
+
401
+ # Retrieve the transcript as a list of dictionaries
402
+ transcript_res = supadata.youtube.transcript(
403
+ video_id=video_id,
404
+ text=True
405
+ )
406
+ transcript_text = transcript_res.content
407
+ # Define story prompts based on story_type, similar to the Wikipedia function
408
+ prompts = {
409
+ "free_form": "You are a professional storyteller. Using the following YouTube transcript: " + transcript_text +
410
+ ", create an engaging and concise story. ",
411
+ "children": "You are a professional storyteller for children. Using the following YouTube transcript: " + transcript_text +
412
+ ", create a fun and concise story. ",
413
+ "education": "You are a professional storyteller. Using the following YouTube transcript: " + transcript_text +
414
+ ", create an educational and engaging story. ",
415
+ "business": "You are a professional storyteller. Using the following YouTube transcript: " + transcript_text +
416
+ ", create a professional business story. ",
417
+ "entertainment": "You are a professional storyteller. Using the following YouTube transcript: " + transcript_text +
418
+ ", create an entertaining and concise story. "
419
+ }
420
+ # Use the provided story_type, defaulting to free_form if not found
421
+ story_prompt = prompts.get(story_type, prompts["free_form"])
422
+
423
+ # Append additional instructions for story structure
424
+ full_prompt = story_prompt + (
425
+ "Write a short story for a narrator meaning no labels of pages or sections the story should just flow and narrated in 2 minutes or less. Divide your story into exactly 5 sections separated by [break]. "
426
+ "For each section, include an image description inside <>."
427
+ )
428
+
429
+ # Generate content using your model (assumes model.generate_content is available)
430
+ response = model.generate_content(full_prompt)
431
+ return response.text if response else None
432
+
433
+ except Exception as e:
434
+ st.error(f"Error generating story from YouTube transcript: {e}")
435
+ return None
436
+
437
+ # -----------------------
438
+ # Extract Image Prompts and Story Sections
439
+ # -----------------------
440
+ def extract_image_prompts_and_story(story_text):
441
+ pages = []
442
+ image_prompts = []
443
+ parts = re.split(r"\[break\]", story_text)
444
+ for part in parts:
445
+ if not part.strip():
446
+ continue
447
+ img_match = re.search(r"<(.*?)>", part)
448
+ if img_match:
449
+ image_prompts.append(img_match.group(1).strip())
450
+ pages.append(re.sub(r"<(.*?)>", "", part).strip())
451
+ else:
452
+ snippet = part.strip()[:100]
453
+ pages.append(snippet)
454
+ image_prompts.append(f"A concise illustration of {snippet}")
455
+ return pages, image_prompts