Spaces:
Sleeping
Sleeping
| """ | |
| LLM์ ์ด์ฉํ ํ๊ฐ ์ ํธ๋ฆฌํฐ | |
| """ | |
| import json | |
| import os | |
| from pathlib import Path | |
| from typing import Dict, Any, Optional | |
| from openai import OpenAI | |
| from dotenv import load_dotenv | |
| # .env ํ์ผ ๋ก๋ (ํ๋ก์ ํธ ๋ฃจํธ ๊ธฐ์ค์ผ๋ก ๋ช ์์ ์ผ๋ก ๋ก๋) | |
| # - ์คํ ์์น(cwd)๊ฐ ๋ฌ๋ผ๋ ๋์ํ๋๋ก ํจ | |
| # - ๊ถํ/ํ๊ฒฝ ์ ์ฝ์ด ์๋ ๊ฒฝ์ฐ์๋ import ์์ฒด๊ฐ ์ฃฝ์ง ์๋๋ก ๋ฐฉ์ด | |
| try: | |
| project_root = Path(__file__).resolve().parent | |
| dotenv_path = project_root / ".env" | |
| # env var๊ฐ ์ด๋ฏธ ์ค์ ๋์ด ์์ด๋(.e.g. test_key) .env๊ฐ ์ฐ์ ํ๋๋ก override=True | |
| load_dotenv(dotenv_path=dotenv_path, override=True) | |
| except (PermissionError, OSError): | |
| # ํ๊ฒฝ์ ๋ฐ๋ผ .env ์ ๊ทผ์ด ๋งํ ์ ์์ (์: ์๋๋ฐ์ค) | |
| # ์ด ๊ฒฝ์ฐ์๋ OPENAI_API_KEY๊ฐ ํ๊ฒฝ๋ณ์๋ก ์ฃผ์ด์ง๋ฉด ์ ์ ๋์ ๊ฐ๋ฅ | |
| pass | |
| class LLMEvaluator: | |
| """OpenAI GPT-4o๋ฅผ ์ฌ์ฉํ ํ๊ฐ์""" | |
| def __init__(self, api_key: Optional[str] = None): | |
| """ | |
| Args: | |
| api_key: OpenAI API ํค. None์ด๋ฉด ํ๊ฒฝ๋ณ์์์ ๋ก๋ | |
| """ | |
| self.api_key = api_key or os.getenv("OPENAI_API_KEY") | |
| if not self.api_key: | |
| raise ValueError( | |
| "OPENAI_API_KEY๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค. " | |
| "ํ๋ก์ ํธ ๋ฃจํธ์ .env ํ์ผ ๋๋ ํ๊ฒฝ๋ณ์ OPENAI_API_KEY๋ฅผ ํ์ธํ์ธ์." | |
| ) | |
| self.client = OpenAI(api_key=self.api_key) | |
| self.model = "gpt-4o" | |
| def evaluate( | |
| self, | |
| data: str, | |
| criteria: str, | |
| max_retries: int = 3 | |
| ) -> Dict[str, Any]: | |
| """ | |
| ์ฃผ์ด์ง ๋ฐ์ดํฐ๋ฅผ ๊ธฐ์ค์ ๋ฐ๋ผ ํ๊ฐ | |
| Args: | |
| data: ํ๊ฐํ ๋ฐ์ดํฐ (JSON ๋ฌธ์์ด ๋๋ ํ ์คํธ) | |
| criteria: ํ๊ฐ ๊ธฐ์ค | |
| max_retries: JSON ํ์ฑ ์คํจ ์ ์ต๋ ์ฌ์๋ ํ์ | |
| Returns: | |
| ํ๊ฐ ๊ฒฐ๊ณผ ๋์ ๋๋ฆฌ | |
| """ | |
| prompt = f"{criteria}\n\n## ํ๊ฐ ๋์ ๋ฐ์ดํฐ:\n{data}\n\n์ ๋ฐ์ดํฐ๋ฅผ ์ฌ์ฌ ๊ธฐ์ค์ ๋ฐ๋ผ **์๊ฒฉํ๊ฒ** ํ๊ฐํ๊ณ , ๋ฐ๋์ JSON ํ์์ผ๋ก๋ง ์๋ตํ์ญ์์ค. ์ ์๋ฅผ ํํ๊ฒ ์ฃผ์ง ๋ง์๊ณ , ๊ธฐ์ค์ ์ ํํ ์ถฉ์กฑํ์ง ์์ผ๋ฉด ๊ฐ์ ํ์ญ์์ค." | |
| for attempt in range(max_retries): | |
| try: | |
| response = self.client.chat.completions.create( | |
| model=self.model, | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": """๋น์ ์ n8n ์ํฌํ๋ก์ฐ ๋ฐ ํ๋ก์ ํธ๋ฅผ ํ๊ฐํ๋ ์ ๋ฌธ ์ฌ์ฌ์์์ ๋๋ค. | |
| โ ๏ธ **์๊ฒฉํ ํ๊ฐ ์์น์ ๋ฐ๋์ ์ค์ํ์ญ์์ค:** | |
| - ์ ์๋ ๋งค์ฐ ์๊ฒฉํ๊ฒ ๋ถ์ฌํ์ญ์์ค. ๊ธฐ์ค์ ์ ํํ ์ถฉ์กฑํ์ง ์์ผ๋ฉด ๊ฐ์ ํ์ญ์์ค. | |
| - "๊ฑฐ์ ์์ฑ" ๋๋ "๋๋ถ๋ถ ์ข์"์ ์ถฉ๋ถํ์ง ์์ต๋๋ค. ์๋ฒฝํ๊ฒ ์ถฉ์กฑํด์ผ๋ง ๋ง์ ์ ๋ถ์ฌํ์ญ์์ค. | |
| - ๋ถํ์คํ๊ฑฐ๋ ๋ช ํํ์ง ์์ ๊ฒฝ์ฐ๋ ๋ณด์์ ์ผ๋ก ๋ฎ์ ์ ์๋ฅผ ๋ถ์ฌํ์ญ์์ค. | |
| - ๊ธฐ์ค์ ์์ ํ ์ถฉ์กฑํ์ง ์์ผ๋ฉด ํด๋น ํญ๋ชฉ์ ๋ํ ์ ์๋ฅผ ๋ถ์ฌํ์ง ๋ง์ญ์์ค. | |
| - ์ ์๋ฅผ ํํ๊ฒ ์ฃผ์ง ๋ง์ญ์์ค. ์๊ฒฉํ๊ณ ๊ณต์ ํ๊ฒ ํ๊ฐํ์ญ์์ค. | |
| ํญ์ JSON ํ์์ผ๋ก๋ง ์๋ตํ์ญ์์ค.""" | |
| }, | |
| { | |
| "role": "user", | |
| "content": prompt | |
| } | |
| ], | |
| temperature=0.0, # ์ผ๊ด์ฑ์ ์ํด ๋ฎ์ temperature ์ฌ์ฉ | |
| response_format={"type": "json_object"} # JSON ์๋ต ๊ฐ์ | |
| ) | |
| result = response.choices[0].message.content | |
| parsed_result = json.loads(result) | |
| return parsed_result | |
| except json.JSONDecodeError as e: | |
| if attempt < max_retries - 1: | |
| print(f"JSON ํ์ฑ ์คํจ (์๋ {attempt + 1}/{max_retries}), ์ฌ์๋ ์ค...") | |
| continue | |
| else: | |
| print(f"JSON ํ์ฑ ์ต์ข ์คํจ: {e}") | |
| return { | |
| "error": "JSON ํ์ฑ ์คํจ", | |
| "raw_response": result, | |
| "์ด์ ": 0 | |
| } | |
| except Exception as e: | |
| print(f"API ํธ์ถ ์คํจ: {e}") | |
| return { | |
| "error": str(e), | |
| "์ด์ ": 0 | |
| } | |
| return {"error": "ํ๊ฐ ์คํจ", "์ด์ ": 0} | |
| def review_evaluation( | |
| self, | |
| data: str, | |
| criteria: str, | |
| initial_score: Dict[str, Any] | |
| ) -> Dict[str, Any]: | |
| """ | |
| ํ๊ฐ ๊ฒฐ๊ณผ๋ฅผ ์ฌ๊ฒํ | |
| Args: | |
| data: ์๋ณธ ๋ฐ์ดํฐ | |
| criteria: ํ๊ฐ ๊ธฐ์ค | |
| initial_score: ์ด๊ธฐ ํ๊ฐ ๊ฒฐ๊ณผ | |
| Returns: | |
| ์ฌ๊ฒํ ๊ฒฐ๊ณผ ๋์ ๋๋ฆฌ | |
| """ | |
| from evaluation_criteria import REVIEW_CRITERIA | |
| review_prompt = REVIEW_CRITERIA.format( | |
| data=data, | |
| criteria=criteria, | |
| initial_score=json.dumps(initial_score, ensure_ascii=False, indent=2) | |
| ) | |
| try: | |
| response = self.client.chat.completions.create( | |
| model=self.model, | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": """ | |
| You are an expert workflow evaluator specializing in n8n JSON workflows, LLM-based automation, and Upstage API utilization analysis. | |
| Your job is to strictly evaluate the submitted workflow and project description using the provided scoring rubrics. | |
| You must never hallucinate, never assume the existence of missing nodes, and evaluate solely based on the JSON/text that is explicitly given. | |
| โ ๏ธ **CRITICAL: Strict Scoring Principles** | |
| - Score very strictly. Do not give points unless criteria are perfectly met. | |
| - "Almost complete" or "mostly good" is NOT sufficient. Only award points when criteria are fully satisfied. | |
| - When uncertain or unclear, give conservative low scores. | |
| - Do not award points for items that do not fully meet the criteria. | |
| - Do NOT be generous with scores. Evaluate strictly and fairly. | |
| - Be conservative and rigorous in your evaluation. | |
| """ | |
| }, | |
| { | |
| "role": "user", | |
| "content": review_prompt | |
| } | |
| ], | |
| temperature=0.2, | |
| response_format={"type": "json_object"} | |
| ) | |
| result = response.choices[0].message.content | |
| parsed_result = json.loads(result) | |
| return parsed_result | |
| except Exception as e: | |
| print(f"์ฌ๊ฒํ ์คํจ: {e}") | |
| return { | |
| "์ฌ๊ฒํ _๊ฒฐ๊ณผ": "์ ์ง", | |
| "์ต์ข _์ ์": initial_score, | |
| "์ฌ๊ฒํ _์๊ฒฌ": f"์ฌ๊ฒํ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" | |
| } | |
| def safe_json_load(file_path: str) -> Optional[Dict[str, Any]]: | |
| """ | |
| JSON ํ์ผ์ ์์ ํ๊ฒ ๋ก๋ | |
| Args: | |
| file_path: JSON ํ์ผ ๊ฒฝ๋ก | |
| Returns: | |
| ํ์ฑ๋ JSON ๊ฐ์ฒด ๋๋ None | |
| """ | |
| try: | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| return json.load(f) | |
| except json.JSONDecodeError: | |
| return None | |
| except Exception as e: | |
| print(f"ํ์ผ ๋ก๋ ์ค๋ฅ: {e}") | |
| return None | |