code-slicer commited on
Commit
8662746
Β·
verified Β·
1 Parent(s): aa0760c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -12
app.py CHANGED
@@ -154,7 +154,7 @@ from chat_a import (
154
  # ──────────────────────────────── LLM ────────────────────────────────
155
  OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
156
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma2:9b")
157
- OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "60"))
158
 
159
 
160
  KOREAN_SYSTEM_PROMPT = """당신은 ν•œκ΅­μ–΄ μ–΄μ‹œμŠ€ν„΄νŠΈμž…λ‹ˆλ‹€. 항상 ν•œκ΅­μ–΄λ‘œ λ‹΅ν•˜μ„Έμš”."""
@@ -248,6 +248,50 @@ def _call_ollama_chat(messages, model=OLLAMA_MODEL, temperature=0.8, top_p=0.9,
248
  st.error(f"μš”μ²­ 였λ₯˜: {e}")
249
  return ""
250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  def _llm_structured_extract(user_text: str):
252
  out = _call_ollama_chat(
253
  [
@@ -287,7 +331,7 @@ def render_llm_followup(chat_container, inline=False):
287
  # μ’…λ£Œ λͺ…λ Ή
288
  if text in {"μ’…λ£Œ", "quit", "exit"}:
289
  st.session_state["llm_inline"] = False
290
- st.session_state["llm_mode"] = False # ← llm_open λŒ€μ‹  llm_mode μ‚¬μš©
291
  st.rerun()
292
  return
293
 
@@ -295,18 +339,28 @@ def render_llm_followup(chat_container, inline=False):
295
  st.session_state.setdefault("llm_msgs", [])
296
  st.session_state["llm_msgs"].append({"role": "user", "content": text})
297
 
298
- # βœ… Ollama둜 μ‹€μ œ 호좜
299
  try:
300
- bot = _call_ollama_chat(
301
- messages=st.session_state["llm_msgs"],
302
- system_prompt=KOREAN_SYSTEM_PROMPT
303
- )
304
- if not bot:
305
- bot = "⚠️ LLM 응닡을 λ°›μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€. Ollama μ„œλ²„λ₯Ό 확인해 μ£Όμ„Έμš”."
306
- except Exception:
307
- bot = "⚠️ LLM 응닡을 λ°›μ§€ λͺ»ν–ˆμŠ΅λ‹ˆλ‹€. Ollama μ„œλ²„λ₯Ό 확인해 μ£Όμ„Έμš”."
 
 
 
 
 
 
 
 
 
 
 
308
 
309
- st.session_state["llm_msgs"].append({"role": "assistant", "content": bot})
310
  st.rerun()
311
 
312
 
 
154
  # ──────────────────────────────── LLM ────────────────────────────────
155
  OLLAMA_HOST = os.getenv("OLLAMA_HOST", "http://localhost:11434")
156
  OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma2:9b")
157
+ OLLAMA_TIMEOUT = int(os.getenv("OLLAMA_TIMEOUT", "300"))
158
 
159
 
160
  KOREAN_SYSTEM_PROMPT = """당신은 ν•œκ΅­μ–΄ μ–΄μ‹œμŠ€ν„΄νŠΈμž…λ‹ˆλ‹€. 항상 ν•œκ΅­μ–΄λ‘œ λ‹΅ν•˜μ„Έμš”."""
 
248
  st.error(f"μš”μ²­ 였λ₯˜: {e}")
249
  return ""
250
 
251
+
252
+ def call_ollama_stream(messages, *, model: str = OLLAMA_MODEL,
253
+ temperature: float = 0.8, top_p: float = 0.9,
254
+ top_k: int = 40, repeat_penalty: float = 1.1,
255
+ num_predict: int = 200, num_ctx: int = 2048,
256
+ system_prompt: str | None = None):
257
+ """
258
+ Ollama /api/chat 슀트리밍 μ œλ„ˆλ ˆμ΄ν„°.
259
+ Streamlitμ—μ„œλŠ” st.write_stream(...)으둜 λ°”λ‘œ μ“Έ 수 있음.
260
+ """
261
+ url = f"{OLLAMA_HOST}/api/chat"
262
+
263
+ _msgs = []
264
+ if system_prompt:
265
+ _msgs.append({"role": "system", "content": system_prompt})
266
+ _msgs.extend(messages)
267
+
268
+ payload = {
269
+ "model": model,
270
+ "messages": _msgs,
271
+ "options": {
272
+ "temperature": temperature,
273
+ "top_p": top_p,
274
+ "top_k": top_k,
275
+ "repeat_penalty": repeat_penalty,
276
+ "num_predict": num_predict, # CPU + 9BλŠ” 128~256 ꢌμž₯
277
+ "num_ctx": num_ctx # 2048~4096
278
+ },
279
+ "stream": True, # βœ… 핡심
280
+ }
281
+
282
+ with requests.post(url, json=payload, stream=True, timeout=OLLAMA_TIMEOUT) as resp:
283
+ resp.raise_for_status()
284
+ for line in resp.iter_lines(decode_unicode=True):
285
+ if not line:
286
+ continue
287
+ data = json.loads(line)
288
+ if data.get("done"):
289
+ break
290
+ chunk = (data.get("message") or {}).get("content", "")
291
+ if chunk:
292
+ yield chunk
293
+
294
+
295
  def _llm_structured_extract(user_text: str):
296
  out = _call_ollama_chat(
297
  [
 
331
  # μ’…λ£Œ λͺ…λ Ή
332
  if text in {"μ’…λ£Œ", "quit", "exit"}:
333
  st.session_state["llm_inline"] = False
334
+ st.session_state["llm_mode"] = False
335
  st.rerun()
336
  return
337
 
 
339
  st.session_state.setdefault("llm_msgs", [])
340
  st.session_state["llm_msgs"].append({"role": "user", "content": text})
341
 
342
+ # βœ… 슀트리밍 호좜둜 λ³€κ²½
343
  try:
344
+ with st.chat_message("assistant"):
345
+ # μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ + νžˆμŠ€ν† λ¦¬ λͺ¨λ‘ 보내기
346
+ msgs = st.session_state["llm_msgs"]
347
+ full_text = st.write_stream(
348
+ call_ollama_stream(
349
+ msgs,
350
+ model=OLLAMA_MODEL,
351
+ system_prompt=KOREAN_SYSTEM_PROMPT,
352
+ num_predict=200, # ν•„μš”μ‹œ 128~256 μ‘°μ •
353
+ num_ctx=2048
354
+ )
355
+ )
356
+ st.session_state["llm_msgs"].append({"role": "assistant", "content": full_text})
357
+ except requests.Timeout:
358
+ st.error(f"⏱️ Ollama νƒ€μž„μ•„μ›ƒ({OLLAMA_TIMEOUT}s). host={OLLAMA_HOST}, model={OLLAMA_MODEL}")
359
+ st.session_state["llm_msgs"].append({"role": "assistant", "content": "⚠️ νƒ€μž„μ•„μ›ƒμ΄ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."})
360
+ except requests.RequestException as e:
361
+ st.error(f"μš”μ²­ 였λ₯˜: {e}")
362
+ st.session_state["llm_msgs"].append({"role": "assistant", "content": "⚠️ LLM 호좜 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."})
363
 
 
364
  st.rerun()
365
 
366