Spaces:
Sleeping
Sleeping
only the current user message
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@ from ollama import Client as OllamaClient
|
|
| 7 |
LLM_MODEL = "llama3.2:3b" # or "mistral:7b", "qwen2.5:3b"
|
| 8 |
WHISPER_SIZE = "small" # "base", "small", "medium"
|
| 9 |
USE_SILERO = True # set False to use Coqui XTTS v2
|
|
|
|
| 10 |
|
| 11 |
import os
|
| 12 |
USE_REMOTE_OLLAMA = bool(os.getenv("OLLAMA_HOST"))
|
|
@@ -43,24 +44,18 @@ Reply: <your friendly response to keep the conversation going>"""
|
|
| 43 |
|
| 44 |
def chat_with_llm(history_messages, user_text):
|
| 45 |
if USE_REMOTE_OLLAMA:
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
messages.append({"role": "user", "content": user_text})
|
| 52 |
resp = ollama.chat(model=LLM_MODEL, messages=messages)
|
| 53 |
return resp["message"]["content"]
|
| 54 |
else:
|
| 55 |
-
#
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
)
|
| 60 |
-
prompt = f"{SYSTEM_PROMPT}\n{history_text}\nUser: {user_text}\nAssistant:"
|
| 61 |
-
out = gen(prompt)[0]["generated_text"]
|
| 62 |
-
# Return only the new assistant chunk after the prompt
|
| 63 |
-
return out.split("Assistant:", 1)[-1].strip()
|
| 64 |
|
| 65 |
|
| 66 |
|
|
|
|
| 7 |
LLM_MODEL = "llama3.2:3b" # or "mistral:7b", "qwen2.5:3b"
|
| 8 |
WHISPER_SIZE = "small" # "base", "small", "medium"
|
| 9 |
USE_SILERO = True # set False to use Coqui XTTS v2
|
| 10 |
+
USE_CONTEXT = False # <— new: disable conversational memory
|
| 11 |
|
| 12 |
import os
|
| 13 |
USE_REMOTE_OLLAMA = bool(os.getenv("OLLAMA_HOST"))
|
|
|
|
| 44 |
|
| 45 |
def chat_with_llm(history_messages, user_text):
|
| 46 |
if USE_REMOTE_OLLAMA:
|
| 47 |
+
# Only system + current user
|
| 48 |
+
messages = [
|
| 49 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 50 |
+
{"role": "user", "content": user_text},
|
| 51 |
+
]
|
|
|
|
| 52 |
resp = ollama.chat(model=LLM_MODEL, messages=messages)
|
| 53 |
return resp["message"]["content"]
|
| 54 |
else:
|
| 55 |
+
# Only system + current user
|
| 56 |
+
prompt = f"{SYSTEM_PROMPT}\nUser: {user_text}\nAssistant:"
|
| 57 |
+
out = gen(prompt, return_full_text=False, max_new_tokens=80, temperature=0.7, repetition_penalty=1.1)[0]["generated_text"].strip()
|
| 58 |
+
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
|