Spaces:
Sleeping
Sleeping
| """VELA Research Agent - Gradio Web Demo | |
| HuggingFace Spaces ๋ฐฐํฌ์ฉ Gradio ๋ฐ๋ชจ. | |
| ZeroGPU ๋ฐฑ์๋๋ก VELA 7B ๋ชจ๋ธ์ ์คํํฉ๋๋ค (HF Pro ํ์). | |
| HuggingFace Spaces ๋ฐฐํฌ ์: | |
| 1. Spaces ์ค์ ์์ SDK๋ฅผ "gradio", Hardware๋ฅผ "ZeroGPU"๋ก ์ ํ | |
| 2. (์ ํ) Secrets์ ๊ฒ์ API ํค ์ถ๊ฐ: | |
| - NAVER_CLIENT_ID_1, NAVER_CLIENT_SECRET_1 | |
| 3. GPU๋ @spaces.GPU ๋ฐ์ฝ๋ ์ดํฐ๋ก ์๋ ํ ๋น | |
| """ | |
| import json | |
| import logging | |
| import os | |
| import time | |
| import traceback | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", | |
| ) | |
| logger = logging.getLogger(__name__) | |
| def get_backend() -> str: | |
| """ํ๊ฒฝ์ ๋ฐ๋ฅธ LLM ๋ฐฑ์๋ ์๋ ์ ํ""" | |
| if os.environ.get("VELA_LLM_BACKEND"): | |
| return os.environ["VELA_LLM_BACKEND"] | |
| if os.environ.get("SPACE_ID"): | |
| return "zerogpu" | |
| if os.environ.get("RUNPOD_API_KEY"): | |
| return "runpod" | |
| return "zerogpu" | |
| def _is_zerogpu_quota_error(e: Exception) -> bool: | |
| """ZeroGPU ์ฟผํฐ/ํ ๋น ์ค๋ฅ ์ฌ๋ถ ํ๋ณ""" | |
| msg = str(e).lower() | |
| return any(kw in msg for kw in ( | |
| "quota", "zerogpu", "out of gpu", "no gpu", "gpu quota", | |
| "exceeded", "gpu not available", "not enough gpu", | |
| )) | |
| def _runpod_available() -> bool: | |
| """RunPod Serverless ํ๊ฒฝ๋ณ์ ์ค์ ์ฌ๋ถ ํ์ธ""" | |
| return bool(os.environ.get("RUNPOD_API_KEY") and os.environ.get("RUNPOD_ENDPOINT_ID")) | |
| BACKEND = get_backend() | |
| logger.info(f"LLM ๋ฐฑ์๋: {BACKEND}") | |
| # ZeroGPU: ์ ์ฒด research๋ฅผ ๋จ์ผ @spaces.GPU(duration=300)์ผ๋ก ๋ํ | |
| # _generate()๋ง๋ค @spaces.GPU๋ฅผ ๋ถ์ด๋ฉด ๋์ผ ์์ฒญ ๋ด ๋ ๋ฒ์งธ GPU ํ ๋น ์คํจ | |
| _has_spaces = False | |
| if BACKEND == "zerogpu": | |
| import vela.tools.zerogpu_client # noqa: F401 โ ๋ชจ๋ธ ์ฌ์ ๋ก๋ | |
| try: | |
| import spaces | |
| _has_spaces = True | |
| except ImportError: | |
| pass | |
| if _has_spaces: | |
| def _run_research_gpu(query: str, max_iterations: int): | |
| """GPU ์ปจํ ์คํธ ๋ด์์ ์ ์ฒด research ์คํ (๋จ์ผ GPU ํ ๋น). | |
| ZeroGPU๋ multiprocessing์ผ๋ก ์ธ์๋ฅผ pickleํ๋ฏ๋ก | |
| ResearchAgent, callback ๋ฑ pickle ๋ถ๊ฐ ๊ฐ์ฒด๋ ์ด ํจ์ ๋ด๋ถ์์ ์์ฑ. | |
| ์ธ์๋ str, int ๋ฑ ๊ธฐ๋ณธ ํ์ ๋ง ํ์ฉ. | |
| """ | |
| from vela import ResearchAgent | |
| from vela.schemas import ResearchOptions | |
| agent = ResearchAgent(llm_backend="zerogpu") | |
| options = ResearchOptions(max_iterations=max_iterations, extract_content=True) | |
| return agent.research(query=query, options=options) | |
| else: | |
| def _run_research_gpu(query: str, max_iterations: int): | |
| from vela import ResearchAgent | |
| from vela.schemas import ResearchOptions | |
| agent = ResearchAgent(llm_backend=BACKEND) | |
| options = ResearchOptions(max_iterations=max_iterations, extract_content=True) | |
| return agent.research(query=query, options=options) | |
| def _run_research_runpod(query: str, max_iterations: int): | |
| """RunPod Serverless fallback (GPU ๋ฐ์ฝ๋ ์ดํฐ ์์)""" | |
| from vela import ResearchAgent | |
| from vela.schemas import ResearchOptions | |
| agent = ResearchAgent(llm_backend="runpod") | |
| options = ResearchOptions(max_iterations=max_iterations, extract_content=True) | |
| return agent.research(query=query, options=options) | |
| def run_research(query: str, max_iterations: int): | |
| """๋ฆฌ์์น ์คํ โ ์คํธ๋ฆฌ๋ฐ ์ ๋๋ ์ดํฐ. | |
| ZeroGPU: ์ ์ฒด research๋ฅผ ๋จ์ผ @spaces.GPU(duration=300) ์ปจํ ์คํธ๋ก ์คํ. | |
| ๋์ผ Gradio ์์ฒญ ๋ด ๋ค์ค @spaces.GPU ํธ์ถ ์ ๋ ๋ฒ์งธ๋ถํฐ GPU ํ ๋น ์คํจํ๋ฏ๋ก | |
| _run_research_gpu()์์ ํ ๋ฒ๋ง GPU๋ฅผ ํ ๋นํ๊ณ ๋ชจ๋ LLM ์ถ๋ก ์ ์ํ. | |
| """ | |
| if not query or not query.strip(): | |
| yield "์ฟผ๋ฆฌ๋ฅผ ์ ๋ ฅํด์ฃผ์ธ์.", "", "" | |
| return | |
| try: | |
| # ์ฒซ ๋ฒ์งธ yield: ์งํ ์ํฉ ์ด๊ธฐํ (UI ์ฆ์ ๋ฐ์) | |
| progress_lines = [f"## ๋ฆฌ์์น ์งํ ์ค: {query.strip()}\n"] | |
| yield "\n".join(progress_lines), "", "" | |
| # ๋จ์ผ GPU ์ปจํ ์คํธ์์ ์ ์ฒด research ์คํ | |
| # ZeroGPU pickle ์ ์ฝ: agent, callback ๋ฑ์ _run_research_gpu ๋ด๋ถ์์ ์์ฑ | |
| result = None | |
| try: | |
| result = _run_research_gpu(query.strip(), int(max_iterations)) | |
| except Exception as gpu_err: | |
| if _is_zerogpu_quota_error(gpu_err) and _runpod_available(): | |
| logger.warning(f"ZeroGPU ์ฟผํฐ ์์ง, RunPod Serverless๋ก ์ ํ: {gpu_err}") | |
| yield ( | |
| f"## ๋ฆฌ์์น ์งํ ์ค: {query.strip()}\n\n" | |
| f"> โ ๏ธ ZeroGPU ์ฟผํฐ ์ด๊ณผ โ RunPod Serverless๋ก ์ ํํฉ๋๋ค...\n", | |
| "", | |
| "", | |
| ) | |
| result = _run_research_runpod(query.strip(), int(max_iterations)) | |
| else: | |
| raise | |
| if not result: | |
| yield "๋ฆฌ์์น ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", "", "" | |
| return | |
| # 1) ๋งํฌ๋ค์ด ๋ฆฌํฌํธ | |
| markdown_report = result.to_markdown() | |
| # 2) ์ถ๋ก ๊ณผ์ | |
| reasoning_lines = [] | |
| for s in result.reasoning_trace: | |
| reasoning_lines.append(f"### Step {s.step_number}") | |
| reasoning_lines.append(f"**Thought**: {s.thought}") | |
| reasoning_lines.append(f"**Action**: {s.action}") | |
| if s.query: | |
| reasoning_lines.append(f"**Query**: `{s.query}`") | |
| reasoning_lines.append(f"**Observation**: {s.observation}") | |
| reasoning_lines.append(f"**Confidence**: {s.confidence:.0%}") | |
| reasoning_lines.append("") | |
| reasoning_md = "\n".join(reasoning_lines) if reasoning_lines else "์ถ๋ก ๊ณผ์ ์์" | |
| # 3) Raw JSON | |
| raw_json = json.dumps(result.to_dict(), ensure_ascii=False, indent=2) | |
| yield markdown_report, reasoning_md, raw_json | |
| except Exception as e: | |
| logger.error(f"๋ฆฌ์์น ์คํจ: {e}") | |
| error_md = ( | |
| f"## ์ค๋ฅ ๋ฐ์\n\n" | |
| f"```\n{type(e).__name__}: {e}\n```\n\n" | |
| f"<details><summary>Traceback</summary>\n\n" | |
| f"```\n{traceback.format_exc()}\n```\n\n" | |
| f"</details>" | |
| ) | |
| yield error_md, "", "" | |
| # ============================================================================ | |
| # Gradio UI | |
| # ============================================================================ | |
| EXAMPLES = [ | |
| ["SKํ์ด๋์ค HBM ์์ฅ ์ ๋ง", 3], | |
| ["์ผ์ฑ์ ์ ํ์ด๋๋ฆฌ ๊ฒฝ์๋ ฅ ๋ถ์", 3], | |
| ["๋ค์ด๋ฒ AI ์ฌ์ ์ ๋ต", 3], | |
| ["ํ๋์ฐจ ์ ๊ธฐ์ฐจ ์์ฅ ์ ์ ์จ", 3], | |
| ] | |
| with gr.Blocks(title="VELA Research Agent", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| "# VELA Research Agent Demo\n" | |
| "*Korean Financial Research with 7B LLM*\n\n" | |
| "VELA๋ ํ๊ตญ ์ฃผ์์์ฅ ์ ๋ฌธ ๋ฆฌ์์น ์์ด์ ํธ์ ๋๋ค. " | |
| "Chain-of-Thought ์ถ๋ก ์ผ๋ก ์น ๊ฒ์, ๋ถ์, ๊ฒฐ๋ก ๋์ถ์ ์๋ ์ํํฉ๋๋ค." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| query_input = gr.Textbox( | |
| label="๋ฆฌ์์น ์ฟผ๋ฆฌ", | |
| placeholder="์: SKํ์ด๋์ค HBM ์์ฅ ์ ๋ง", | |
| lines=1, | |
| ) | |
| with gr.Column(scale=1): | |
| max_iter_slider = gr.Slider( | |
| minimum=1, maximum=5, value=3, step=1, | |
| label="์ต๋ ๋ฐ๋ณต", | |
| ) | |
| run_btn = gr.Button("๋ฆฌ์์น ์คํ", variant="primary", size="lg") | |
| # ๊ฒฐ๊ณผ ์์ญ | |
| report_output = gr.Markdown(label="๋ฆฌ์์น ๊ฒฐ๊ณผ") | |
| with gr.Accordion("์ถ๋ก ๊ณผ์ (Reasoning Trace)", open=False): | |
| reasoning_output = gr.Markdown() | |
| with gr.Accordion("Raw JSON", open=False): | |
| json_output = gr.Code(language="json") | |
| # ์์ | |
| gr.Examples( | |
| examples=EXAMPLES, | |
| inputs=[query_input, max_iter_slider], | |
| label="์์ ์ฟผ๋ฆฌ", | |
| ) | |
| # Limitations | |
| with gr.Accordion("Limitations", open=False): | |
| gr.Markdown( | |
| "### Known Limitations\n\n" | |
| "*์ด ๋ฐ๋ชจ๋ ๊ณต๊ฐ ๊ฒ์ API + ๋ค์ด๋ฒ ์ฆ๊ถ ๋ฐ์ดํฐ๋ฅผ ์ฌ์ฉํฉ๋๋ค.*\n\n" | |
| "| ํญ๋ชฉ | ์ค๋ช | ์์ฉ ๋ฐฐํฌ |\n" | |
| "|------|------|----------|\n" | |
| "| **๋ชจ๋ธ ํฌ๊ธฐ** | 7B ํ๋ผ๋ฏธํฐ โ ๋ณต์กํ ๋ค๋จ๊ณ ์ถ๋ก ์ ๋ํ ๋ชจ๋ธ ๋๋น ํ์ง ์ ํ ๊ฐ๋ฅ | |\n" | |
| "| **์ธ์ด** | ํ๊ตญ ๊ธ์ต ๋๋ฉ์ธ ์ ์ฉ โ ์์ด/๋ค๊ตญ์ด ์ฟผ๋ฆฌ๋ ํ์ง ์ ํ | |\n" | |
| "| **์์ธ/๋ฐธ๋ฅ์์ด์ ** | ๋ค์ด๋ฒ ์ฆ๊ถ ์ค์๊ฐ ์ฐ๋ (PER/PBR/EPS/์๊ธ) | FnGuide ์ถ๊ฐ ๊ฐ๋ฅ |\n" | |
| "| **๊ฒ์ ๋ฒ์** | Naver + DuckDuckGo โ ์ ๋ฃ DB ์ ๊ทผ ๋ถ๊ฐ | ์ฆ๊ถ์ฌ ๋ฆฌํฌํธ ์ฐ๋ |\n" | |
| "| **์ฝํ ์ธ ์ถ์ถ** | ๊ฒ์ ๋จ๊ณ๋น ์์ 3๊ฐ๋ง ๋ณธ๋ฌธ ์ถ์ถ | ์ ๋ฌธ ์ถ์ถ ๊ฐ๋ฅ |\n" | |
| "| **๋ฐ๋ณต ์์ฑ** | 7B ๋ชจ๋ธ ํน์ฑ์ ์ถ๋ ฅ ๋ฐ๋ณต ๊ฐ๋ฅ โ ํ์ฒ๋ฆฌ๋ก ์ํ | |\n" | |
| "| **์ ๋ขฐ๋** | ์๊ธฐ ๋ณด๊ณ ๋ฐฉ์ (calibrated ์๋) | |\n\n" | |
| "### Production Enhancements\n\n" | |
| "์์ฉ ๋ฐฐํฌ์์ VELA๋ ๋ค์์ ์ถ๊ฐ ์ฐ๋ํ ์ ์์ต๋๋ค:\n" | |
| "- **FnGuide API**: ์ค์๊ฐ ์ปจ์ผ์์ค, ๋ชฉํ๊ฐ, ์ ๋๋ฆฌ์คํธ ํ์ (50๊ฐ+ ์ฆ๊ถ์ฌ)\n" | |
| "- **์ฆ๊ถ์ฌ ๋ฆฌํฌํธ**: ์ฃผ์ ์ฆ๊ถ์ฌ ๋ฆฌํฌํธ ์ ๋ฌธ ์ถ์ถ\n" | |
| "- **์ฌ๋ฌด์ ํ**: 3๊ฐ๋ + ๋์ฐจ๋์กฐํ, ํ๊ธํ๋ฆํ, ์์ต๊ณ์ฐ์\n\n" | |
| "์ํฐํ๋ผ์ด์ฆ ๋ฌธ์: hello@intrect.io\n\n" | |
| "---\n\n" | |
| "**VELA๋ ํฌ์ ์กฐ์ธ ๋๊ตฌ๊ฐ ์๋๋๋ค.** " | |
| "์ ๋ณด ์ ๊ณต/๊ต์ก ๋ชฉ์ ์ผ๋ก๋ง ์ฌ์ฉํ์ธ์. ํฌ์ ํ๋จ์ ์ ๋ฌธ๊ฐ์ ์๋ดํ์๊ธฐ ๋ฐ๋๋๋ค." | |
| ) | |
| # ์ด๋ฒคํธ ๋ฐ์ธ๋ฉ | |
| run_btn.click( | |
| fn=run_research, | |
| inputs=[query_input, max_iter_slider], | |
| outputs=[report_output, reasoning_output, json_output], | |
| ) | |
| query_input.submit( | |
| fn=run_research, | |
| inputs=[query_input, max_iter_slider], | |
| outputs=[report_output, reasoning_output, json_output], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |