Update sozo_gen.py
Browse files- sozo_gen.py +4 -4
sozo_gen.py
CHANGED
|
@@ -45,7 +45,7 @@ def load_dataframe_safely(buf, name: str):
|
|
| 45 |
def deepgram_tts(txt: str, voice_model: str):
|
| 46 |
DG_KEY = os.getenv("DEEPGRAM_API_KEY")
|
| 47 |
if not DG_KEY or not txt: return None
|
| 48 |
-
txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)
|
| 49 |
try:
|
| 50 |
r = requests.post("https://api.deepgram.com/v1/speak", params={"model": voice_model}, headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"}, json={"text": txt}, timeout=30)
|
| 51 |
r.raise_for_status()
|
|
@@ -69,7 +69,7 @@ extract_chart_tags = lambda t: list( dict.fromkeys(m.group("d").strip() for m in
|
|
| 69 |
re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I | re.M)
|
| 70 |
def clean_narration(txt: str) -> str:
|
| 71 |
txt = TAG_RE.sub("", txt); txt = re_scene.sub("", txt)
|
| 72 |
-
phrases_to_remove = [r"
|
| 73 |
for phrase in phrases_to_remove: txt = re.sub(phrase, "", txt, flags=re.IGNORECASE)
|
| 74 |
txt = re.sub(r"\s*\([^)]*\)", "", txt); txt = re.sub(r"[\*#_]", "", txt)
|
| 75 |
return re.sub(r"\s{2,}", " ", txt).strip()
|
|
@@ -604,7 +604,7 @@ def generate_report_draft(buf, name: str, ctx: str, uid: str, project_id: str, b
|
|
| 604 |
logging.info(f"Generating autonomous report draft for project {project_id}")
|
| 605 |
|
| 606 |
df = load_dataframe_safely(buf, name)
|
| 607 |
-
llm = ChatGoogleGenerativeAI(model="gemini-2.
|
| 608 |
|
| 609 |
# Build enhanced context with AI intelligence
|
| 610 |
ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx}
|
|
@@ -965,7 +965,7 @@ def generate_single_chart(df: pd.DataFrame, description: str, uid: str, project_
|
|
| 965 |
|
| 966 |
def generate_video_from_project(df: pd.DataFrame, raw_md: str, uid: str, project_id: str, voice_model: str, bucket):
|
| 967 |
logging.info(f"Generating video for project {project_id} with voice {voice_model}")
|
| 968 |
-
llm = ChatGoogleGenerativeAI(model="gemini-2.
|
| 969 |
story_prompt = f"Based on the following report, create a script for a {VIDEO_SCENES}-scene video. Each scene must be separated by '[SCENE_BREAK]' and contain narration and one chart tag. Report: {raw_md}. only output the script no quips"
|
| 970 |
script = llm.invoke(story_prompt).content
|
| 971 |
scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
|
|
|
|
| 45 |
def deepgram_tts(txt: str, voice_model: str):
|
| 46 |
DG_KEY = os.getenv("DEEPGRAM_API_KEY")
|
| 47 |
if not DG_KEY or not txt: return None
|
| 48 |
+
txt = re.sub(r"[^\w\s.,!?;:-]", "", txt)
|
| 49 |
try:
|
| 50 |
r = requests.post("https://api.deepgram.com/v1/speak", params={"model": voice_model}, headers={"Authorization": f"Token {DG_KEY}", "Content-Type": "application/json"}, json={"text": txt}, timeout=30)
|
| 51 |
r.raise_for_status()
|
|
|
|
| 69 |
re_scene = re.compile(r"^\s*scene\s*\d+[:.\- ]*", re.I | re.M)
|
| 70 |
def clean_narration(txt: str) -> str:
|
| 71 |
txt = TAG_RE.sub("", txt); txt = re_scene.sub("", txt)
|
| 72 |
+
phrases_to_remove = [r"chart tag", r"chart_tag", r"narration"]
|
| 73 |
for phrase in phrases_to_remove: txt = re.sub(phrase, "", txt, flags=re.IGNORECASE)
|
| 74 |
txt = re.sub(r"\s*\([^)]*\)", "", txt); txt = re.sub(r"[\*#_]", "", txt)
|
| 75 |
return re.sub(r"\s{2,}", " ", txt).strip()
|
|
|
|
| 604 |
logging.info(f"Generating autonomous report draft for project {project_id}")
|
| 605 |
|
| 606 |
df = load_dataframe_safely(buf, name)
|
| 607 |
+
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.1)
|
| 608 |
|
| 609 |
# Build enhanced context with AI intelligence
|
| 610 |
ctx_dict = {"shape": df.shape, "columns": list(df.columns), "user_ctx": ctx}
|
|
|
|
| 965 |
|
| 966 |
def generate_video_from_project(df: pd.DataFrame, raw_md: str, uid: str, project_id: str, voice_model: str, bucket):
|
| 967 |
logging.info(f"Generating video for project {project_id} with voice {voice_model}")
|
| 968 |
+
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", google_api_key=API_KEY, temperature=0.2)
|
| 969 |
story_prompt = f"Based on the following report, create a script for a {VIDEO_SCENES}-scene video. Each scene must be separated by '[SCENE_BREAK]' and contain narration and one chart tag. Report: {raw_md}. only output the script no quips"
|
| 970 |
script = llm.invoke(story_prompt).content
|
| 971 |
scenes = [s.strip() for s in script.split("[SCENE_BREAK]") if s.strip()]
|