sbs-API

Build error

App Files Files Community

rairo commited on Jul 19, 2025

Commit

9baa9d2

verified ·

1 Parent(s): 42b2c37

Update sozo_gen.py

Browse files

Files changed (1) hide show

sozo_gen.py +4 -4

sozo_gen.py CHANGED Viewed

@@ -45,7 +45,7 @@ def load_dataframe_safely(buf, name: str):
 def deepgram_tts(txt: str, voice_model: str):
     DG_KEY = os.getenv("DEEPGRAM_API_KEY")
     if not DG_KEY or not txt: return None
-    txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]
     try:
         r = requests.post("https://api.deepgram.com/v1/speak", params={"model": voice_model}, headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"}, json={"text": txt}, timeout=30)
         r.raise_for_status()
@@ -69,7 +69,7 @@ extract_chart_tags = lambda t: list( dict.fromkeys(m.group("d").strip() for m in
 re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I | re.M)
 def clean_narration(txt: str) -> str:
     txt = TAG_RE.sub("", txt); txt = re_scene.sub("", txt)
-    phrases_to_remove = [r"as you can see in the chart", r"this chart shows", r"the chart illustrates", r"in this visual", r"this graph displays"]
     for phrase in phrases_to_remove: txt = re.sub(phrase, "", txt, flags=re.IGNORECASE)
     txt = re.sub(r"\s*\([^)]*\)", "", txt); txt = re.sub(r"[\*#_]", "", txt)
     return re.sub(r"\s{2,}", " ", txt).strip()
@@ -604,7 +604,7 @@ def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, b
     logging.info(f"Generating autonomous report draft for project {project_id}")
     df = load_dataframe_safely(buf, name)
-    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
     # Build enhanced context with AI intelligence
     ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx}
@@ -965,7 +965,7 @@ def generate_single_chart(df: pd.DataFrame, description: str, uid: str, project_
 def generate_video_from_project(df: pd.DataFrame, raw_md: str, uid: str, project_id: str, voice_model: str, bucket):
     logging.info(f"Generating video for project {project_id} with voice {voice_model}")
-    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2)
     story_prompt = f"Based on the following report, create a script for a {VIDEO_SCENES}-scene video. Each scene must be separated by '[SCENE_BREAK]' and contain narration and one chart tag. Report: {raw_md}. only output the script no quips"
     script = llm.invoke(story_prompt).content
     scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]

 def deepgram_tts(txt: str, voice_model: str):
     DG_KEY = os.getenv("DEEPGRAM_API_KEY")
     if not DG_KEY or not txt: return None
+    txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)
     try:
         r = requests.post("https://api.deepgram.com/v1/speak", params={"model": voice_model}, headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"}, json={"text": txt}, timeout=30)
         r.raise_for_status()
 re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I | re.M)
 def clean_narration(txt: str) -> str:
     txt = TAG_RE.sub("", txt); txt = re_scene.sub("", txt)
+    phrases_to_remove = [r"chart tag", r"chart_tag", r"narration"]
     for phrase in phrases_to_remove: txt = re.sub(phrase, "", txt, flags=re.IGNORECASE)
     txt = re.sub(r"\s*\([^)]*\)", "", txt); txt = re.sub(r"[\*#_]", "", txt)
     return re.sub(r"\s{2,}", " ", txt).strip()
     logging.info(f"Generating autonomous report draft for project {project_id}")
     df = load_dataframe_safely(buf, name)
+    llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.1)
     # Build enhanced context with AI intelligence
     ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx}
 def generate_video_from_project(df: pd.DataFrame, raw_md: str, uid: str, project_id: str, voice_model: str, bucket):
     logging.info(f"Generating video for project {project_id} with voice {voice_model}")
+    llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.2)
     story_prompt = f"Based on the following report, create a script for a {VIDEO_SCENES}-scene video. Each scene must be separated by '[SCENE_BREAK]' and contain narration and one chart tag. Report: {raw_md}. only output the script no quips"
     script = llm.invoke(story_prompt).content
     scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]