Spaces:
Sleeping
Sleeping
fix: add dotenv loading, enrichment retry logic, and graceful error handling
Browse files- Load .env automatically via python-dotenv in config.py
- Retry Gemini calls up to 3 times with exponential backoff on empty responses
- Skip failed enrichments in CLI instead of crashing the full pipeline
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- cli.py +12 -9
- src/mediastorm/config.py +4 -0
- src/mediastorm/enrich/enricher.py +16 -8
cli.py
CHANGED
|
@@ -76,15 +76,18 @@ def enrich(cache_dir: str, enrichment_dir: str):
|
|
| 76 |
for c in story.get("credits", [])
|
| 77 |
if c.get("name")
|
| 78 |
)
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
asyncio.run(_run())
|
| 90 |
click.echo("Enrichment complete.")
|
|
|
|
| 76 |
for c in story.get("credits", [])
|
| 77 |
if c.get("name")
|
| 78 |
)
|
| 79 |
+
try:
|
| 80 |
+
result = await _enrich(
|
| 81 |
+
title=story.get("name", ""),
|
| 82 |
+
description=description.strip(),
|
| 83 |
+
transcript=transcript.strip(),
|
| 84 |
+
cache_dir=enrichment_path,
|
| 85 |
+
story_uid=uid,
|
| 86 |
+
credits=credits_str,
|
| 87 |
+
)
|
| 88 |
+
click.echo(f" [{i+1}/{len(files)}] {story.get('name', uid)} — {len(result.topics)} topics")
|
| 89 |
+
except Exception as e:
|
| 90 |
+
click.echo(f" [{i+1}/{len(files)}] FAIL {story.get('name', uid)}: {e}")
|
| 91 |
|
| 92 |
asyncio.run(_run())
|
| 93 |
click.echo("Enrichment complete.")
|
src/mediastorm/config.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
| 1 |
import os
|
| 2 |
from pathlib import Path
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "")
|
| 5 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 6 |
CHROMADB_PATH = Path(os.environ.get("CHROMADB_PATH", "./data/chromadb"))
|
|
|
|
| 1 |
import os
|
| 2 |
from pathlib import Path
|
| 3 |
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY", "")
|
| 9 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 10 |
CHROMADB_PATH = Path(os.environ.get("CHROMADB_PATH", "./data/chromadb"))
|
src/mediastorm/enrich/enricher.py
CHANGED
|
@@ -89,15 +89,23 @@ async def enrich_story(
|
|
| 89 |
themes=THEMES_LIST,
|
| 90 |
)
|
| 91 |
|
|
|
|
|
|
|
| 92 |
client = _get_client()
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
raw = json.loads(response.text)
|
| 103 |
enrichment = StoryEnrichment.model_validate(raw)
|
|
|
|
| 89 |
themes=THEMES_LIST,
|
| 90 |
)
|
| 91 |
|
| 92 |
+
import asyncio as _aio
|
| 93 |
+
|
| 94 |
client = _get_client()
|
| 95 |
+
for attempt in range(3):
|
| 96 |
+
response = await client.aio.models.generate_content(
|
| 97 |
+
model=GEMINI_FLASH_MODEL,
|
| 98 |
+
contents=prompt,
|
| 99 |
+
config=GenerateContentConfig(
|
| 100 |
+
response_mime_type="application/json",
|
| 101 |
+
temperature=0.2,
|
| 102 |
+
),
|
| 103 |
+
)
|
| 104 |
+
if response.text:
|
| 105 |
+
break
|
| 106 |
+
await _aio.sleep(2 ** attempt)
|
| 107 |
+
else:
|
| 108 |
+
raise ValueError(f"Gemini returned empty response after 3 attempts")
|
| 109 |
|
| 110 |
raw = json.loads(response.text)
|
| 111 |
enrichment = StoryEnrichment.model_validate(raw)
|