bdstar commited on
Commit
4d66d17
·
verified ·
1 Parent(s): 0f86bd3

only the current user message

Browse files
Files changed (1) hide show
  1. app.py +10 -15
app.py CHANGED
@@ -7,6 +7,7 @@ from ollama import Client as OllamaClient
7
  LLM_MODEL = "llama3.2:3b" # or "mistral:7b", "qwen2.5:3b"
8
  WHISPER_SIZE = "small" # "base", "small", "medium"
9
  USE_SILERO = True # set False to use Coqui XTTS v2
 
10
 
11
  import os
12
  USE_REMOTE_OLLAMA = bool(os.getenv("OLLAMA_HOST"))
@@ -43,24 +44,18 @@ Reply: <your friendly response to keep the conversation going>"""
43
 
44
  def chat_with_llm(history_messages, user_text):
45
  if USE_REMOTE_OLLAMA:
46
- messages = [{"role": "system", "content": SYSTEM_PROMPT}]
47
-
48
- for m in (history_messages or []):
49
- if m.get("role") in ("user", "assistant") and m.get("content"):
50
- messages.append({"role": m["role"], "content": m["content"]})
51
- messages.append({"role": "user", "content": user_text})
52
  resp = ollama.chat(model=LLM_MODEL, messages=messages)
53
  return resp["message"]["content"]
54
  else:
55
- # Simple prompt stitching for the fallback pipeline
56
- history_text = "\n".join(
57
- [f"User: {m['content']}" if m["role"]=="user" else f"Assistant: {m['content']}"
58
- for m in (history_messages or [])]
59
- )
60
- prompt = f"{SYSTEM_PROMPT}\n{history_text}\nUser: {user_text}\nAssistant:"
61
- out = gen(prompt)[0]["generated_text"]
62
- # Return only the new assistant chunk after the prompt
63
- return out.split("Assistant:", 1)[-1].strip()
64
 
65
 
66
 
 
7
  LLM_MODEL = "llama3.2:3b" # or "mistral:7b", "qwen2.5:3b"
8
  WHISPER_SIZE = "small" # "base", "small", "medium"
9
  USE_SILERO = True # set False to use Coqui XTTS v2
10
+ USE_CONTEXT = False # <— new: disable conversational memory
11
 
12
  import os
13
  USE_REMOTE_OLLAMA = bool(os.getenv("OLLAMA_HOST"))
 
44
 
45
  def chat_with_llm(history_messages, user_text):
46
  if USE_REMOTE_OLLAMA:
47
+ # Only system + current user
48
+ messages = [
49
+ {"role": "system", "content": SYSTEM_PROMPT},
50
+ {"role": "user", "content": user_text},
51
+ ]
 
52
  resp = ollama.chat(model=LLM_MODEL, messages=messages)
53
  return resp["message"]["content"]
54
  else:
55
+ # Only system + current user
56
+ prompt = f"{SYSTEM_PROMPT}\nUser: {user_text}\nAssistant:"
57
+ out = gen(prompt, return_full_text=False, max_new_tokens=80, temperature=0.7, repetition_penalty=1.1)[0]["generated_text"].strip()
58
+ return out
 
 
 
 
 
59
 
60
 
61