rairo commited on
Commit
9baa9d2
·
verified ·
1 Parent(s): 42b2c37

Update sozo_gen.py

Browse files
Files changed (1) hide show
  1. sozo_gen.py +4 -4
sozo_gen.py CHANGED
@@ -45,7 +45,7 @@ def load_dataframe_safely(buf, name: str):
45
  def deepgram_tts(txt: str, voice_model: str):
46
  DG_KEY = os.getenv("DEEPGRAM_API_KEY")
47
  if not DG_KEY or not txt: return None
48
- txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)[:1000]
49
  try:
50
  r = requests.post("https://api.deepgram.com/v1/speak", params={"model": voice_model}, headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"}, json={"text": txt}, timeout=30)
51
  r.raise_for_status()
@@ -69,7 +69,7 @@ extract_chart_tags = lambda t: list( dict.fromkeys(m.group("d").strip() for m in
69
  re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I | re.M)
70
  def clean_narration(txt: str) -> str:
71
  txt = TAG_RE.sub("", txt); txt = re_scene.sub("", txt)
72
- phrases_to_remove = [r"as you can see in the chart", r"this chart shows", r"the chart illustrates", r"in this visual", r"this graph displays"]
73
  for phrase in phrases_to_remove: txt = re.sub(phrase, "", txt, flags=re.IGNORECASE)
74
  txt = re.sub(r"\s*\([^)]*\)", "", txt); txt = re.sub(r"[\*#_]", "", txt)
75
  return re.sub(r"\s{2,}", " ", txt).strip()
@@ -604,7 +604,7 @@ def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, b
604
  logging.info(f"Generating autonomous report draft for project {project_id}")
605
 
606
  df = load_dataframe_safely(buf, name)
607
- llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.1)
608
 
609
  # Build enhanced context with AI intelligence
610
  ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx}
@@ -965,7 +965,7 @@ def generate_single_chart(df: pd.DataFrame, description: str, uid: str, project_
965
 
966
  def generate_video_from_project(df: pd.DataFrame, raw_md: str, uid: str, project_id: str, voice_model: str, bucket):
967
  logging.info(f"Generating video for project {project_id} with voice {voice_model}")
968
- llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=API_KEY, temperature=0.2)
969
  story_prompt = f"Based on the following report, create a script for a {VIDEO_SCENES}-scene video. Each scene must be separated by '[SCENE_BREAK]' and contain narration and one chart tag. Report: {raw_md}. only output the script no quips"
970
  script = llm.invoke(story_prompt).content
971
  scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
 
45
  def deepgram_tts(txt: str, voice_model: str):
46
  DG_KEY = os.getenv("DEEPGRAM_API_KEY")
47
  if not DG_KEY or not txt: return None
48
+ txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)
49
  try:
50
  r = requests.post("https://api.deepgram.com/v1/speak", params={"model": voice_model}, headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"}, json={"text": txt}, timeout=30)
51
  r.raise_for_status()
 
69
  re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I | re.M)
70
  def clean_narration(txt: str) -> str:
71
  txt = TAG_RE.sub("", txt); txt = re_scene.sub("", txt)
72
+ phrases_to_remove = [r"chart tag", r"chart_tag", r"narration"]
73
  for phrase in phrases_to_remove: txt = re.sub(phrase, "", txt, flags=re.IGNORECASE)
74
  txt = re.sub(r"\s*\([^)]*\)", "", txt); txt = re.sub(r"[\*#_]", "", txt)
75
  return re.sub(r"\s{2,}", " ", txt).strip()
 
604
  logging.info(f"Generating autonomous report draft for project {project_id}")
605
 
606
  df = load_dataframe_safely(buf, name)
607
+ llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.1)
608
 
609
  # Build enhanced context with AI intelligence
610
  ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx}
 
965
 
966
  def generate_video_from_project(df: pd.DataFrame, raw_md: str, uid: str, project_id: str, voice_model: str, bucket):
967
  logging.info(f"Generating video for project {project_id} with voice {voice_model}")
968
+ llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.2)
969
  story_prompt = f"Based on the following report, create a script for a {VIDEO_SCENES}-scene video. Each scene must be separated by '[SCENE_BREAK]' and contain narration and one chart tag. Report: {raw_md}. only output the script no quips"
970
  script = llm.invoke(story_prompt).content
971
  scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]