Spaces:

boo4blue
/

UniversalAI

Runtime error

App Files Files Community

boo4blue commited on Sep 18, 2025

Commit

2d13893

verified ·

1 Parent(s): b8dc1c2

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -11

app.py CHANGED Viewed

@@ -2,24 +2,23 @@ import os, time, json
 import gradio as gr
 from llama_cpp import Llama
-MODEL_REPO = os.environ.get("MODEL_REPO", "microsoft/Phi-3.1-mini-4k-instruct-gguf")
-MODEL_FILE = os.environ.get("MODEL_FILE", "Phi-3.1-mini-4k-instruct-q4.gguf")
 SAVE_PATH = "convos.jsonl"
-# Lazy init
 llm = None
 def get_llm():
     global llm
     if llm is not None:
         return llm
-    # Auto-download GGUF from HF hub on first run
     llm = Llama.from_pretrained(
         repo_id=MODEL_REPO,
         filename=MODEL_FILE,
         n_ctx=4096,
-        n_threads=4,     # Spaces CPU: keep modest
-        n_gpu_layers=0,  # CPU only
         verbose=False
     )
     return llm
@@ -29,12 +28,11 @@ def format_messages(system, history, user_msg):
     if system.strip():
         msgs.append({"role": "system", "content": system})
     for h in history:
-        role = "user" if h[0] is not None else "assistant"
         if h[0] is not None:
-            msgs.append({"role":"user","content":h[0]})
         if h[1] is not None:
-            msgs.append({"role":"assistant","content":h[1]})
-    msgs.append({"role":"user","content":user_msg})
     return msgs
 def save_turn(system, history, user_msg, assistant_msg):
@@ -52,7 +50,6 @@ def chat_fn(user_msg, history, system, temperature, top_p, max_new_tokens):
     llm = get_llm()
     msgs = format_messages(system, history, user_msg)
-    # Stream tokens
     stream = llm.create_chat_completion(
         messages=msgs,
         temperature=temperature,

 import gradio as gr
 from llama_cpp import Llama
+# ✅ Working public GGUF model
+MODEL_REPO = "TheBloke/Phi-3.5-mini-instruct-GGUF"
+MODEL_FILE = "phi-3.5-mini-instruct-q4_K_M.gguf"
 SAVE_PATH = "convos.jsonl"
 llm = None
 def get_llm():
     global llm
     if llm is not None:
         return llm
     llm = Llama.from_pretrained(
         repo_id=MODEL_REPO,
         filename=MODEL_FILE,
         n_ctx=4096,
+        n_threads=4,
+        n_gpu_layers=0,
         verbose=False
     )
     return llm
     if system.strip():
         msgs.append({"role": "system", "content": system})
     for h in history:
         if h[0] is not None:
+            msgs.append({"role": "user", "content": h[0]})
         if h[1] is not None:
+            msgs.append({"role": "assistant", "content": h[1]})
+    msgs.append({"role": "user", "content": user_msg})
     return msgs
 def save_turn(system, history, user_msg, assistant_msg):
     llm = get_llm()
     msgs = format_messages(system, history, user_msg)
     stream = llm.create_chat_completion(
         messages=msgs,
         temperature=temperature,