Spaces:

fugthchat
/

Hannah-Pilot-Interface

Sleeping

App Files Files Community

fugthchat commited on Dec 20, 2025

Commit

6715353

1 Parent(s): 44b6d82

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -10

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from llama_cpp import Llama
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -18,7 +19,7 @@ app.add_middleware(
 )
 # --- Configuration ---
-# Map the real filenames to your preferred names
 MODEL_MAP = {
     "qwen2.5-0.5b-instruct-q2_k.gguf": "Hannah-1.0 Light",
     "qwen2.5-0.5b-instruct-q4_k_m.gguf": "Hannah-1.0 Heavy"
@@ -39,12 +40,12 @@ def get_model(model_name):
     print(f"Loading {model_name}...")
     if current_model is not None: del current_model
-    # Speed Optimization for 0.5B
     current_model = Llama(
         model_path=model_name,
-        n_ctx=4096,
-        n_threads=2,
-        n_batch=1024,
         verbose=False
     )
     current_model_name = model_name
@@ -53,9 +54,9 @@ def get_model(model_name):
 @app.get("/api/models")
 async def list_models():
     models_info = []
-    # Only look for the files you uploaded
     for f in glob.glob("*.gguf"):
-        display_name = MODEL_MAP.get(f, f) # Use custom name if available, else filename
         size_mb = os.path.getsize(f) / (1024 * 1024)
         models_info.append({
             "filename": f,
@@ -91,9 +92,21 @@ async def chat(request: Request):
     llm = get_model(model_file)
     def iter_response():
-        # Standard ChatML Prompt
-        prompt = f"<|im_start|>system\nYou are Hannah 1.0, an intelligent pilot assistant.<|im_end|>\n<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
-        stream = llm(prompt, max_tokens=2048, stop=["<|im_end|>"], stream=True)
         for output in stream:
             yield json.dumps({"text": output['choices'][0]['text']}) + "\n"

 app = FastAPI()
+# --- CORS Permissions ---
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
 )
 # --- Configuration ---
+# Map filenames to "Hannah" names
 MODEL_MAP = {
     "qwen2.5-0.5b-instruct-q2_k.gguf": "Hannah-1.0 Light",
     "qwen2.5-0.5b-instruct-q4_k_m.gguf": "Hannah-1.0 Heavy"
     print(f"Loading {model_name}...")
     if current_model is not None: del current_model
+    # --- PERFORMANCE TUNING ---
     current_model = Llama(
         model_path=model_name,
+        n_ctx=4096,      # Large memory for conversation history
+        n_threads=2,     # MAX for Hugging Face Free Tier (Crucial for speed)
+        n_batch=512,     # Process tokens in chunks
         verbose=False
     )
     current_model_name = model_name
 @app.get("/api/models")
 async def list_models():
     models_info = []
+    # Scan for .gguf files
     for f in glob.glob("*.gguf"):
+        display_name = MODEL_MAP.get(f, f)
         size_mb = os.path.getsize(f) / (1024 * 1024)
         models_info.append({
             "filename": f,
     llm = get_model(model_file)
     def iter_response():
+        # --- PROMPT ENGINEERING FOR ACCURACY ---
+        # Qwen 2.5 specific format for best results
+        prompt = f"""<|im_start|>system
+You are Hannah 1.0, an intelligent, fast, and helpful pilot assistant. Answer efficiently.<|im_end|>
+<|im_start|>user
+{user_input}<|im_end|>
+<|im_start|>assistant
+"""
+        # Stream response
+        stream = llm(
+            prompt,
+            max_tokens=2048,
+            stop=["<|im_end|>", "User:", "System:"], # Stop exactly when done
+            stream=True
+        )
         for output in stream:
             yield json.dumps({"text": output['choices'][0]['text']}) + "\n"