Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -154,7 +154,7 @@ from chat_a import (
|
|
| 154 |
# ββββββββββββββββββββββββββββββββ LLM ββββββββββββββββββββββββββββββββ
|
| 155 |
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
|
| 156 |
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma2:9b")
|
| 157 |
-
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "
|
| 158 |
|
| 159 |
|
| 160 |
KOREAN_SYSTEM_PROMPT = """λΉμ μ νκ΅μ΄ μ΄μμ€ν΄νΈμ
λλ€. νμ νκ΅μ΄λ‘ λ΅νμΈμ."""
|
|
@@ -248,6 +248,50 @@ def _call_ollama_chat(messages, model=OLLAMA_MODEL, temperature=0.8, top_p=0.9,
|
|
| 248 |
st.error(f"μμ² μ€λ₯: {e}")
|
| 249 |
return ""
|
| 250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
def _llm_structured_extract(user_text: str):
|
| 252 |
out = _call_ollama_chat(
|
| 253 |
[
|
|
@@ -287,7 +331,7 @@ def render_llm_followup(chat_container, inline=False):
|
|
| 287 |
# μ’
λ£ λͺ
λ Ή
|
| 288 |
if text in {"μ’
λ£", "quit", "exit"}:
|
| 289 |
st.session_state["llm_inline"] = False
|
| 290 |
-
st.session_state["llm_mode"] = False
|
| 291 |
st.rerun()
|
| 292 |
return
|
| 293 |
|
|
@@ -295,18 +339,28 @@ def render_llm_followup(chat_container, inline=False):
|
|
| 295 |
st.session_state.setdefault("llm_msgs", [])
|
| 296 |
st.session_state["llm_msgs"].append({"role": "user", "content": text})
|
| 297 |
|
| 298 |
-
# β
|
| 299 |
try:
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
|
| 309 |
-
st.session_state["llm_msgs"].append({"role": "assistant", "content": bot})
|
| 310 |
st.rerun()
|
| 311 |
|
| 312 |
|
|
|
|
| 154 |
# ββββββββββββββββββββββββββββββββ LLM ββββββββββββββββββββββββββββββββ
|
| 155 |
OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
|
| 156 |
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma2:9b")
|
| 157 |
+
OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "300"))
|
| 158 |
|
| 159 |
|
| 160 |
KOREAN_SYSTEM_PROMPT = """λΉμ μ νκ΅μ΄ μ΄μμ€ν΄νΈμ
λλ€. νμ νκ΅μ΄λ‘ λ΅νμΈμ."""
|
|
|
|
| 248 |
st.error(f"μμ² μ€λ₯: {e}")
|
| 249 |
return ""
|
| 250 |
|
| 251 |
+
|
| 252 |
+
def call_ollama_stream(messages, *, model: str = OLLAMA_MODEL,
|
| 253 |
+
temperature: float = 0.8, top_p: float = 0.9,
|
| 254 |
+
top_k: int = 40, repeat_penalty: float = 1.1,
|
| 255 |
+
num_predict: int = 200, num_ctx: int = 2048,
|
| 256 |
+
system_prompt: str | None = None):
|
| 257 |
+
"""
|
| 258 |
+
Ollama /api/chat μ€νΈλ¦¬λ° μ λλ μ΄ν°.
|
| 259 |
+
Streamlitμμλ st.write_stream(...)μΌλ‘ λ°λ‘ μΈ μ μμ.
|
| 260 |
+
"""
|
| 261 |
+
url = f"{OLLAMA_HOST}/api/chat"
|
| 262 |
+
|
| 263 |
+
_msgs = []
|
| 264 |
+
if system_prompt:
|
| 265 |
+
_msgs.append({"role": "system", "content": system_prompt})
|
| 266 |
+
_msgs.extend(messages)
|
| 267 |
+
|
| 268 |
+
payload = {
|
| 269 |
+
"model": model,
|
| 270 |
+
"messages": _msgs,
|
| 271 |
+
"options": {
|
| 272 |
+
"temperature": temperature,
|
| 273 |
+
"top_p": top_p,
|
| 274 |
+
"top_k": top_k,
|
| 275 |
+
"repeat_penalty": repeat_penalty,
|
| 276 |
+
"num_predict": num_predict, # CPU + 9Bλ 128~256 κΆμ₯
|
| 277 |
+
"num_ctx": num_ctx # 2048~4096
|
| 278 |
+
},
|
| 279 |
+
"stream": True, # β
ν΅μ¬
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
with requests.post(url, json=payload, stream=True, timeout=OLLAMA_TIMEOUT) as resp:
|
| 283 |
+
resp.raise_for_status()
|
| 284 |
+
for line in resp.iter_lines(decode_unicode=True):
|
| 285 |
+
if not line:
|
| 286 |
+
continue
|
| 287 |
+
data = json.loads(line)
|
| 288 |
+
if data.get("done"):
|
| 289 |
+
break
|
| 290 |
+
chunk = (data.get("message") or {}).get("content", "")
|
| 291 |
+
if chunk:
|
| 292 |
+
yield chunk
|
| 293 |
+
|
| 294 |
+
|
| 295 |
def _llm_structured_extract(user_text: str):
|
| 296 |
out = _call_ollama_chat(
|
| 297 |
[
|
|
|
|
| 331 |
# μ’
λ£ λͺ
λ Ή
|
| 332 |
if text in {"μ’
λ£", "quit", "exit"}:
|
| 333 |
st.session_state["llm_inline"] = False
|
| 334 |
+
st.session_state["llm_mode"] = False
|
| 335 |
st.rerun()
|
| 336 |
return
|
| 337 |
|
|
|
|
| 339 |
st.session_state.setdefault("llm_msgs", [])
|
| 340 |
st.session_state["llm_msgs"].append({"role": "user", "content": text})
|
| 341 |
|
| 342 |
+
# β
μ€νΈλ¦¬λ° νΈμΆλ‘ λ³κ²½
|
| 343 |
try:
|
| 344 |
+
with st.chat_message("assistant"):
|
| 345 |
+
# μμ€ν
ν둬ννΈ + νμ€ν 리 λͺ¨λ 보λ΄κΈ°
|
| 346 |
+
msgs = st.session_state["llm_msgs"]
|
| 347 |
+
full_text = st.write_stream(
|
| 348 |
+
call_ollama_stream(
|
| 349 |
+
msgs,
|
| 350 |
+
model=OLLAMA_MODEL,
|
| 351 |
+
system_prompt=KOREAN_SYSTEM_PROMPT,
|
| 352 |
+
num_predict=200, # νμμ 128~256 μ‘°μ
|
| 353 |
+
num_ctx=2048
|
| 354 |
+
)
|
| 355 |
+
)
|
| 356 |
+
st.session_state["llm_msgs"].append({"role": "assistant", "content": full_text})
|
| 357 |
+
except requests.Timeout:
|
| 358 |
+
st.error(f"β±οΈ Ollama νμμμ({OLLAMA_TIMEOUT}s). host={OLLAMA_HOST}, model={OLLAMA_MODEL}")
|
| 359 |
+
st.session_state["llm_msgs"].append({"role": "assistant", "content": "β οΈ νμμμμ΄ λ°μνμ΅λλ€."})
|
| 360 |
+
except requests.RequestException as e:
|
| 361 |
+
st.error(f"μμ² μ€λ₯: {e}")
|
| 362 |
+
st.session_state["llm_msgs"].append({"role": "assistant", "content": "β οΈ LLM νΈμΆ μ€ μ€λ₯κ° λ°μνμ΅λλ€."})
|
| 363 |
|
|
|
|
| 364 |
st.rerun()
|
| 365 |
|
| 366 |
|