Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import logging | |
| from openai import AsyncOpenAI | |
| from typing import Dict, Any | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| logger = logging.getLogger(__name__) | |
| from app.url_utils import openai_compatible_api_key, sanitize_env | |
| from app.llm_client import get_llm_client | |
| class ParserAgent: | |
| def __init__(self): | |
| self.llm = get_llm_client() | |
| async def process(self, text: str, feedback: str = None, context: Dict[str, Any] = None) -> Dict[str, Any]: | |
| logger.info(f"==[ParserAgent] Processing input (len={len(text)})==") | |
| if feedback: | |
| logger.warning(f"[ParserAgent] Feedback from previous attempt: {feedback}") | |
| if context: | |
| logger.info(f"[ParserAgent] Using previous context (dsl_len={len(context.get('geometry_dsl', ''))})") | |
| system_prompt = """ | |
| You are a Geometry Parser Agent. Extract geometric entities and constraints from Vietnamese/LaTeX math problem text. | |
| === CONTEXT AWARENESS === | |
| If previous context is provided, it means this is a follow-up request. | |
| - Combine old entities with new ones. | |
| - Update 'analysis' to reflect the entire problem state. | |
| Output ONLY a JSON object with this EXACT structure (no extra keys, no markdown): | |
| { | |
| "entities": ["Point A", "Point B", ...], | |
| "type": "pyramid|prism|sphere|rectangle|triangle|circle|parallelogram|trapezoid|square|rhombus|general", | |
| "values": {"AB": 5, "SO": 15, "radius": 3}, | |
| "target_question": "Câu hỏi cụ thể cần giải (ví dụ: 'Tính diện tích tam giác ABC'). NẾU KHÔNG CÓ CÂU HỎI THÌ ĐỂ null.", | |
| "analysis": "Tóm tắt ngắn gọn toàn bộ bài toán sau khi đã cập nhật các yêu cầu mới bằng tiếng Việt." | |
| } | |
| Rules: | |
| - "analysis" MUST be a meaningful and UP-TO-DATE summary of the problem in Vietnamese. | |
| - "target_question" must be concise. | |
| - Include midpoints, auxiliary points in "entities" if mentioned. | |
| - If feedback is provided, correct your previous output accordingly. | |
| """ | |
| user_content = f"Text: {text}" | |
| if context: | |
| user_content = f"PREVIOUS ANALYSIS: {context.get('analysis')}\nNEW REQUEST: {text}" | |
| if feedback: | |
| user_content += f"\nFeedback from previous attempt: {feedback}. Please correct the constraints." | |
| logger.debug("[ParserAgent] Calling LLM (Multi-Layer)...") | |
| raw = await self.llm.chat_completions_create( | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_content} | |
| ], | |
| response_format={"type": "json_object"} | |
| ) | |
| # Pre-process raw string: extract the JSON block if present | |
| import re | |
| clean_raw = raw.strip() | |
| # Handle potential markdown code blocks | |
| if clean_raw.startswith("```"): | |
| import re | |
| match = re.search(r"```(?:json)?\s*(.*?)\s*```", clean_raw, re.DOTALL) | |
| if match: | |
| clean_raw = match.group(1).strip() | |
| try: | |
| result = json.loads(clean_raw) | |
| except json.JSONDecodeError as e: | |
| logger.error(f"[ParserAgent] JSON Parse Error: {e}. Attempting regex fallback...") | |
| import re | |
| json_match = re.search(r'(\{.*\})', clean_raw, re.DOTALL) | |
| if json_match: | |
| try: | |
| # Handle single quotes if present (common LLM failure) | |
| json_str = json_match.group(1) | |
| if "'" in json_str and '"' not in json_str: | |
| json_str = json_str.replace("'", '"') | |
| result = json.loads(json_str) | |
| except: | |
| result = None | |
| else: | |
| result = None | |
| if not result: | |
| # Fallback for critical failure | |
| result = { | |
| "entities": [], | |
| "type": "general", | |
| "values": {}, | |
| "target_question": None, | |
| "analysis": text | |
| } | |
| logger.info(f"[ParserAgent] LLM response received.") | |
| logger.debug(f"[ParserAgent] Parsed JSON: {json.dumps(result, ensure_ascii=False, indent=2)}") | |
| return result | |