vela-demo / app.py
Heewon Oh
feat(app): ZeroGPU ์ฟผํ„ฐ ์†Œ์ง„ ์‹œ RunPod Serverless fallback ์ถ”๊ฐ€
e73a081
raw
history blame
10.3 kB
"""VELA Research Agent - Gradio Web Demo
HuggingFace Spaces ๋ฐฐํฌ์šฉ Gradio ๋ฐ๋ชจ.
ZeroGPU ๋ฐฑ์—”๋“œ๋กœ VELA 7B ๋ชจ๋ธ์„ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค (HF Pro ํ•„์š”).
HuggingFace Spaces ๋ฐฐํฌ ์‹œ:
1. Spaces ์„ค์ •์—์„œ SDK๋ฅผ "gradio", Hardware๋ฅผ "ZeroGPU"๋กœ ์„ ํƒ
2. (์„ ํƒ) Secrets์— ๊ฒ€์ƒ‰ API ํ‚ค ์ถ”๊ฐ€:
- NAVER_CLIENT_ID_1, NAVER_CLIENT_SECRET_1
3. GPU๋Š” @spaces.GPU ๋ฐ์ฝ”๋ ˆ์ดํ„ฐ๋กœ ์ž๋™ ํ• ๋‹น
"""
import json
import logging
import os
import time
import traceback
import gradio as gr
from dotenv import load_dotenv
load_dotenv()
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
logger = logging.getLogger(__name__)
def get_backend() -> str:
"""ํ™˜๊ฒฝ์— ๋”ฐ๋ฅธ LLM ๋ฐฑ์—”๋“œ ์ž๋™ ์„ ํƒ"""
if os.environ.get("VELA_LLM_BACKEND"):
return os.environ["VELA_LLM_BACKEND"]
if os.environ.get("SPACE_ID"):
return "zerogpu"
if os.environ.get("RUNPOD_API_KEY"):
return "runpod"
return "zerogpu"
def _is_zerogpu_quota_error(e: Exception) -> bool:
"""ZeroGPU ์ฟผํ„ฐ/ํ• ๋‹น ์˜ค๋ฅ˜ ์—ฌ๋ถ€ ํŒ๋ณ„"""
msg = str(e).lower()
return any(kw in msg for kw in (
"quota", "zerogpu", "out of gpu", "no gpu", "gpu quota",
"exceeded", "gpu not available", "not enough gpu",
))
def _runpod_available() -> bool:
"""RunPod Serverless ํ™˜๊ฒฝ๋ณ€์ˆ˜ ์„ค์ • ์—ฌ๋ถ€ ํ™•์ธ"""
return bool(os.environ.get("RUNPOD_API_KEY") and os.environ.get("RUNPOD_ENDPOINT_ID"))
BACKEND = get_backend()
logger.info(f"LLM ๋ฐฑ์—”๋“œ: {BACKEND}")
# ZeroGPU: ์ „์ฒด research๋ฅผ ๋‹จ์ผ @spaces.GPU(duration=300)์œผ๋กœ ๋ž˜ํ•‘
# _generate()๋งˆ๋‹ค @spaces.GPU๋ฅผ ๋ถ™์ด๋ฉด ๋™์ผ ์š”์ฒญ ๋‚ด ๋‘ ๋ฒˆ์งธ GPU ํ• ๋‹น ์‹คํŒจ
_has_spaces = False
if BACKEND == "zerogpu":
import vela.tools.zerogpu_client # noqa: F401 โ€” ๋ชจ๋ธ ์‚ฌ์ „ ๋กœ๋“œ
try:
import spaces
_has_spaces = True
except ImportError:
pass
if _has_spaces:
@spaces.GPU(duration=300)
def _run_research_gpu(query: str, max_iterations: int):
"""GPU ์ปจํ…์ŠคํŠธ ๋‚ด์—์„œ ์ „์ฒด research ์‹คํ–‰ (๋‹จ์ผ GPU ํ• ๋‹น).
ZeroGPU๋Š” multiprocessing์œผ๋กœ ์ธ์ž๋ฅผ pickleํ•˜๋ฏ€๋กœ
ResearchAgent, callback ๋“ฑ pickle ๋ถˆ๊ฐ€ ๊ฐ์ฒด๋Š” ์ด ํ•จ์ˆ˜ ๋‚ด๋ถ€์—์„œ ์ƒ์„ฑ.
์ธ์ž๋Š” str, int ๋“ฑ ๊ธฐ๋ณธ ํƒ€์ž…๋งŒ ํ—ˆ์šฉ.
"""
from vela import ResearchAgent
from vela.schemas import ResearchOptions
agent = ResearchAgent(llm_backend="zerogpu")
options = ResearchOptions(max_iterations=max_iterations, extract_content=True)
return agent.research(query=query, options=options)
else:
def _run_research_gpu(query: str, max_iterations: int):
from vela import ResearchAgent
from vela.schemas import ResearchOptions
agent = ResearchAgent(llm_backend=BACKEND)
options = ResearchOptions(max_iterations=max_iterations, extract_content=True)
return agent.research(query=query, options=options)
def _run_research_runpod(query: str, max_iterations: int):
"""RunPod Serverless fallback (GPU ๋ฐ์ฝ”๋ ˆ์ดํ„ฐ ์—†์Œ)"""
from vela import ResearchAgent
from vela.schemas import ResearchOptions
agent = ResearchAgent(llm_backend="runpod")
options = ResearchOptions(max_iterations=max_iterations, extract_content=True)
return agent.research(query=query, options=options)
def run_research(query: str, max_iterations: int):
"""๋ฆฌ์„œ์น˜ ์‹คํ–‰ โ€” ์ŠคํŠธ๋ฆฌ๋ฐ ์ œ๋„ˆ๋ ˆ์ดํ„ฐ.
ZeroGPU: ์ „์ฒด research๋ฅผ ๋‹จ์ผ @spaces.GPU(duration=300) ์ปจํ…์ŠคํŠธ๋กœ ์‹คํ–‰.
๋™์ผ Gradio ์š”์ฒญ ๋‚ด ๋‹ค์ค‘ @spaces.GPU ํ˜ธ์ถœ ์‹œ ๋‘ ๋ฒˆ์งธ๋ถ€ํ„ฐ GPU ํ• ๋‹น ์‹คํŒจํ•˜๋ฏ€๋กœ
_run_research_gpu()์—์„œ ํ•œ ๋ฒˆ๋งŒ GPU๋ฅผ ํ• ๋‹นํ•˜๊ณ  ๋ชจ๋“  LLM ์ถ”๋ก ์„ ์ˆ˜ํ–‰.
"""
if not query or not query.strip():
yield "์ฟผ๋ฆฌ๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.", "", ""
return
try:
# ์ฒซ ๋ฒˆ์งธ yield: ์ง„ํ–‰ ์ƒํ™ฉ ์ดˆ๊ธฐํ™” (UI ์ฆ‰์‹œ ๋ฐ˜์‘)
progress_lines = [f"## ๋ฆฌ์„œ์น˜ ์ง„ํ–‰ ์ค‘: {query.strip()}\n"]
yield "\n".join(progress_lines), "", ""
# ๋‹จ์ผ GPU ์ปจํ…์ŠคํŠธ์—์„œ ์ „์ฒด research ์‹คํ–‰
# ZeroGPU pickle ์ œ์•ฝ: agent, callback ๋“ฑ์€ _run_research_gpu ๋‚ด๋ถ€์—์„œ ์ƒ์„ฑ
result = None
try:
result = _run_research_gpu(query.strip(), int(max_iterations))
except Exception as gpu_err:
if _is_zerogpu_quota_error(gpu_err) and _runpod_available():
logger.warning(f"ZeroGPU ์ฟผํ„ฐ ์†Œ์ง„, RunPod Serverless๋กœ ์ „ํ™˜: {gpu_err}")
yield (
f"## ๋ฆฌ์„œ์น˜ ์ง„ํ–‰ ์ค‘: {query.strip()}\n\n"
f"> โš ๏ธ ZeroGPU ์ฟผํ„ฐ ์ดˆ๊ณผ โ€” RunPod Serverless๋กœ ์ „ํ™˜ํ•ฉ๋‹ˆ๋‹ค...\n",
"",
"",
)
result = _run_research_runpod(query.strip(), int(max_iterations))
else:
raise
if not result:
yield "๋ฆฌ์„œ์น˜ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", "", ""
return
# 1) ๋งˆํฌ๋‹ค์šด ๋ฆฌํฌํŠธ
markdown_report = result.to_markdown()
# 2) ์ถ”๋ก  ๊ณผ์ •
reasoning_lines = []
for s in result.reasoning_trace:
reasoning_lines.append(f"### Step {s.step_number}")
reasoning_lines.append(f"**Thought**: {s.thought}")
reasoning_lines.append(f"**Action**: {s.action}")
if s.query:
reasoning_lines.append(f"**Query**: `{s.query}`")
reasoning_lines.append(f"**Observation**: {s.observation}")
reasoning_lines.append(f"**Confidence**: {s.confidence:.0%}")
reasoning_lines.append("")
reasoning_md = "\n".join(reasoning_lines) if reasoning_lines else "์ถ”๋ก  ๊ณผ์ • ์—†์Œ"
# 3) Raw JSON
raw_json = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
yield markdown_report, reasoning_md, raw_json
except Exception as e:
logger.error(f"๋ฆฌ์„œ์น˜ ์‹คํŒจ: {e}")
error_md = (
f"## ์˜ค๋ฅ˜ ๋ฐœ์ƒ\n\n"
f"```\n{type(e).__name__}: {e}\n```\n\n"
f"<details><summary>Traceback</summary>\n\n"
f"```\n{traceback.format_exc()}\n```\n\n"
f"</details>"
)
yield error_md, "", ""
# ============================================================================
# Gradio UI
# ============================================================================
EXAMPLES = [
["SKํ•˜์ด๋‹‰์Šค HBM ์‹œ์žฅ ์ „๋ง", 3],
["์‚ผ์„ฑ์ „์ž ํŒŒ์šด๋“œ๋ฆฌ ๊ฒฝ์Ÿ๋ ฅ ๋ถ„์„", 3],
["๋„ค์ด๋ฒ„ AI ์‚ฌ์—… ์ „๋žต", 3],
["ํ˜„๋Œ€์ฐจ ์ „๊ธฐ์ฐจ ์‹œ์žฅ ์ ์œ ์œจ", 3],
]
with gr.Blocks(title="VELA Research Agent", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"# VELA Research Agent Demo\n"
"*Korean Financial Research with 7B LLM*\n\n"
"VELA๋Š” ํ•œ๊ตญ ์ฃผ์‹์‹œ์žฅ ์ „๋ฌธ ๋ฆฌ์„œ์น˜ ์—์ด์ „ํŠธ์ž…๋‹ˆ๋‹ค. "
"Chain-of-Thought ์ถ”๋ก ์œผ๋กœ ์›น ๊ฒ€์ƒ‰, ๋ถ„์„, ๊ฒฐ๋ก  ๋„์ถœ์„ ์ž๋™ ์ˆ˜ํ–‰ํ•ฉ๋‹ˆ๋‹ค."
)
with gr.Row():
with gr.Column(scale=3):
query_input = gr.Textbox(
label="๋ฆฌ์„œ์น˜ ์ฟผ๋ฆฌ",
placeholder="์˜ˆ: SKํ•˜์ด๋‹‰์Šค HBM ์‹œ์žฅ ์ „๋ง",
lines=1,
)
with gr.Column(scale=1):
max_iter_slider = gr.Slider(
minimum=1, maximum=5, value=3, step=1,
label="์ตœ๋Œ€ ๋ฐ˜๋ณต",
)
run_btn = gr.Button("๋ฆฌ์„œ์น˜ ์‹คํ–‰", variant="primary", size="lg")
# ๊ฒฐ๊ณผ ์˜์—ญ
report_output = gr.Markdown(label="๋ฆฌ์„œ์น˜ ๊ฒฐ๊ณผ")
with gr.Accordion("์ถ”๋ก  ๊ณผ์ • (Reasoning Trace)", open=False):
reasoning_output = gr.Markdown()
with gr.Accordion("Raw JSON", open=False):
json_output = gr.Code(language="json")
# ์˜ˆ์ œ
gr.Examples(
examples=EXAMPLES,
inputs=[query_input, max_iter_slider],
label="์˜ˆ์ œ ์ฟผ๋ฆฌ",
)
# Limitations
with gr.Accordion("Limitations", open=False):
gr.Markdown(
"### Known Limitations\n\n"
"*์ด ๋ฐ๋ชจ๋Š” ๊ณต๊ฐœ ๊ฒ€์ƒ‰ API + ๋„ค์ด๋ฒ„ ์ฆ๊ถŒ ๋ฐ์ดํ„ฐ๋ฅผ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.*\n\n"
"| ํ•ญ๋ชฉ | ์„ค๋ช… | ์ƒ์šฉ ๋ฐฐํฌ |\n"
"|------|------|----------|\n"
"| **๋ชจ๋ธ ํฌ๊ธฐ** | 7B ํŒŒ๋ผ๋ฏธํ„ฐ โ€” ๋ณต์žกํ•œ ๋‹ค๋‹จ๊ณ„ ์ถ”๋ก ์€ ๋Œ€ํ˜• ๋ชจ๋ธ ๋Œ€๋น„ ํ’ˆ์งˆ ์ €ํ•˜ ๊ฐ€๋Šฅ | |\n"
"| **์–ธ์–ด** | ํ•œ๊ตญ ๊ธˆ์œต ๋„๋ฉ”์ธ ์ „์šฉ โ€” ์˜์–ด/๋‹ค๊ตญ์–ด ์ฟผ๋ฆฌ๋Š” ํ’ˆ์งˆ ์ €ํ•˜ | |\n"
"| **์‹œ์„ธ/๋ฐธ๋ฅ˜์—์ด์…˜** | ๋„ค์ด๋ฒ„ ์ฆ๊ถŒ ์‹ค์‹œ๊ฐ„ ์—ฐ๋™ (PER/PBR/EPS/์ˆ˜๊ธ‰) | FnGuide ์ถ”๊ฐ€ ๊ฐ€๋Šฅ |\n"
"| **๊ฒ€์ƒ‰ ๋ฒ”์œ„** | Naver + DuckDuckGo โ€” ์œ ๋ฃŒ DB ์ ‘๊ทผ ๋ถˆ๊ฐ€ | ์ฆ๊ถŒ์‚ฌ ๋ฆฌํฌํŠธ ์—ฐ๋™ |\n"
"| **์ฝ˜ํ…์ธ  ์ถ”์ถœ** | ๊ฒ€์ƒ‰ ๋‹จ๊ณ„๋‹น ์ƒ์œ„ 3๊ฐœ๋งŒ ๋ณธ๋ฌธ ์ถ”์ถœ | ์ „๋ฌธ ์ถ”์ถœ ๊ฐ€๋Šฅ |\n"
"| **๋ฐ˜๋ณต ์ƒ์„ฑ** | 7B ๋ชจ๋ธ ํŠน์„ฑ์ƒ ์ถœ๋ ฅ ๋ฐ˜๋ณต ๊ฐ€๋Šฅ โ€” ํ›„์ฒ˜๋ฆฌ๋กœ ์™„ํ™” | |\n"
"| **์‹ ๋ขฐ๋„** | ์ž๊ธฐ ๋ณด๊ณ  ๋ฐฉ์‹ (calibrated ์•„๋‹˜) | |\n\n"
"### Production Enhancements\n\n"
"์ƒ์šฉ ๋ฐฐํฌ์—์„œ VELA๋Š” ๋‹ค์Œ์„ ์ถ”๊ฐ€ ์—ฐ๋™ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค:\n"
"- **FnGuide API**: ์‹ค์‹œ๊ฐ„ ์ปจ์„ผ์„œ์Šค, ๋ชฉํ‘œ๊ฐ€, ์• ๋„๋ฆฌ์ŠคํŠธ ํ‰์  (50๊ฐœ+ ์ฆ๊ถŒ์‚ฌ)\n"
"- **์ฆ๊ถŒ์‚ฌ ๋ฆฌํฌํŠธ**: ์ฃผ์š” ์ฆ๊ถŒ์‚ฌ ๋ฆฌํฌํŠธ ์ „๋ฌธ ์ถ”์ถœ\n"
"- **์žฌ๋ฌด์ œํ‘œ**: 3๊ฐœ๋…„+ ๋Œ€์ฐจ๋Œ€์กฐํ‘œ, ํ˜„๊ธˆํ๋ฆ„ํ‘œ, ์†์ต๊ณ„์‚ฐ์„œ\n\n"
"์—”ํ„ฐํ”„๋ผ์ด์ฆˆ ๋ฌธ์˜: hello@intrect.io\n\n"
"---\n\n"
"**VELA๋Š” ํˆฌ์ž ์กฐ์–ธ ๋„๊ตฌ๊ฐ€ ์•„๋‹™๋‹ˆ๋‹ค.** "
"์ •๋ณด ์ œ๊ณต/๊ต์œก ๋ชฉ์ ์œผ๋กœ๋งŒ ์‚ฌ์šฉํ•˜์„ธ์š”. ํˆฌ์ž ํŒ๋‹จ์€ ์ „๋ฌธ๊ฐ€์™€ ์ƒ๋‹ดํ•˜์‹œ๊ธฐ ๋ฐ”๋ž๋‹ˆ๋‹ค."
)
# ์ด๋ฒคํŠธ ๋ฐ”์ธ๋”ฉ
run_btn.click(
fn=run_research,
inputs=[query_input, max_iter_slider],
outputs=[report_output, reasoning_output, json_output],
)
query_input.submit(
fn=run_research,
inputs=[query_input, max_iter_slider],
outputs=[report_output, reasoning_output, json_output],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)