Spaces:
Sleeping
Sleeping
| import os, json, re | |
| from .utils_llm import call_llm | |
| PROMPT = """ | |
| You are a senior business analyst. | |
| Read the document text and output structured JSON for a PowerApps-style app. | |
| Rules: | |
| 1️⃣ Output only JSON with these exact top-level keys: | |
| entities, workflows, roles, actions, ui_notes | |
| 2️⃣ Do not add any text, comments, or markdown. | |
| 3️⃣ If information is missing, return empty lists. | |
| Document: | |
| {text} | |
| """ | |
| GEMINI_KEY_EXTRACTOR = os.getenv("GEMINI_API_KEY_EXTRACTOR") | |
| async def extract_requirements(text: str) -> dict: | |
| raw = await call_llm( | |
| PROMPT.format(text=text[:15000]), | |
| system="Return strictly valid JSON with top-level keys entities, workflows, roles, actions, ui_notes", | |
| api_key=GEMINI_KEY_EXTRACTOR, | |
| temperature=0.0, | |
| ) | |
| # --- clean raw output --- | |
| raw = (raw or "").strip() | |
| # remove anything before first '{' and after last '}' | |
| start = raw.find("{") | |
| end = raw.rfind("}") + 1 | |
| json_candidate = raw[start:end] if start != -1 and end != -1 else "{}" | |
| try: | |
| data = json.loads(json_candidate) | |
| except Exception: | |
| # final fallback: safe empty schema | |
| data = {} | |
| # guarantee required keys | |
| for k in ["entities", "workflows", "roles", "actions", "ui_notes"]: | |
| if k not in data or not isinstance(data.get(k), list): | |
| data[k] = [] | |
| # optional debug | |
| if os.getenv("DEBUG_EXTRACTOR") == "1": | |
| print("--- RAW EXTRACTOR OUTPUT ---") | |
| print(raw[:400]) | |
| return data |