mockup_agent / agents /requirements_extractor.py
dina1's picture
Update agents/requirements_extractor.py
e416c9c verified
import os, json, re
from .utils_llm import call_llm
PROMPT = """
You are a senior business analyst.
Read the document text and output structured JSON for a PowerApps-style app.
Rules:
1️⃣ Output only JSON with these exact top-level keys:
entities, workflows, roles, actions, ui_notes
2️⃣ Do not add any text, comments, or markdown.
3️⃣ If information is missing, return empty lists.
Document:
{text}
"""
GEMINI_KEY_EXTRACTOR = os.getenv("GEMINI_API_KEY_EXTRACTOR")
async def extract_requirements(text: str) -> dict:
raw = await call_llm(
PROMPT.format(text=text[:15000]),
system="Return strictly valid JSON with top-level keys entities, workflows, roles, actions, ui_notes",
api_key=GEMINI_KEY_EXTRACTOR,
temperature=0.0,
)
# --- clean raw output ---
raw = (raw or "").strip()
# remove anything before first '{' and after last '}'
start = raw.find("{")
end = raw.rfind("}") + 1
json_candidate = raw[start:end] if start != -1 and end != -1 else "{}"
try:
data = json.loads(json_candidate)
except Exception:
# final fallback: safe empty schema
data = {}
# guarantee required keys
for k in ["entities", "workflows", "roles", "actions", "ui_notes"]:
if k not in data or not isinstance(data.get(k), list):
data[k] = []
# optional debug
if os.getenv("DEBUG_EXTRACTOR") == "1":
print("--- RAW EXTRACTOR OUTPUT ---")
print(raw[:400])
return data