Spaces:
Sleeping
Sleeping
| # utils/parser_utils.py | |
| import re | |
| import json | |
| from typing import List, Dict, Any, Union | |
| def extract_json_from_llm_response(response_text: str) -> Union[List[Dict[str, Any]], Dict[str, Any]]: | |
| """ | |
| LLM μλ΅ ν μ€νΈμμ ```json ... ``` λλ [...] λλ {...} λΈλ‘μ | |
| μμ νκ² μΆμΆνκ³ νμ±ν©λλ€. | |
| μ€ν¨ μ ValueErrorλ₯Ό λ°μμν΅λλ€. | |
| """ | |
| json_str = None | |
| # 1. ```json [...] ``` λ§ν¬λ€μ΄ λΈλ‘ κ²μ (κ°μ₯ μ°μ ) | |
| # re.DOTALL (s) νλκ·Έ: μ€λ°κΏ λ¬Έμλ₯Ό ν¬ν¨νμ¬ λ§€μΉ | |
| # re.MULTILINE (m) νλκ·Έ: ^, $κ° κ° μ€μ μμ/λμ λ§€μΉ | |
| json_match = re.search( | |
| r'```json\s*([\s\S]*?)\s*```', | |
| response_text, | |
| re.DOTALL | re.IGNORECASE | |
| ) | |
| if json_match: | |
| json_str = json_match.group(1).strip() | |
| else: | |
| # 2. λ§ν¬λ€μ΄μ΄ μλ€λ©΄, 첫 λ²μ§Έ { λλ [ λ₯Ό μ°Ύμ | |
| first_bracket_match = re.search(r'[{|\[]', response_text) | |
| if first_bracket_match: | |
| start_index = first_bracket_match.start() | |
| # μλ΅μ΄ 리μ€νΈ([])λ‘ μμνλ κ²½μ° | |
| if response_text[start_index] == '[': | |
| list_match = re.search(r'(\[[\s\S]*\])', response_text[start_index:], re.DOTALL) | |
| if list_match: | |
| json_str = list_match.group(0) | |
| # μλ΅μ΄ λμ λ리({})λ‘ μμνλ κ²½μ° | |
| elif response_text[start_index] == '{': | |
| dict_match = re.search(r'(\{[\s\S]*\})', response_text[start_index:], re.DOTALL) | |
| if dict_match: | |
| json_str = dict_match.group(0) | |
| if json_str is None: | |
| raise ValueError(f"μλ΅μμ JSON λΈλ‘μ μ°Ύμ§ λͺ»νμ΅λλ€. (μλ΅ μμ: {response_text[:150]}...)") | |
| try: | |
| # (λλ²κΉ ) μΆμΆλ λ¬Έμμ΄ λ‘κΉ | |
| # print(f"--- [Parser DEBUG] Extracted JSON String: {json_str[:200]}... ---") | |
| return json.loads(json_str) | |
| except json.JSONDecodeError as e: | |
| raise ValueError(f"JSON νμ±μ μ€ν¨νμ΅λλ€: {e}. (μΆμΆλ λ¬Έμμ΄: {json_str[:150]}...)") |