Spaces:

fugthchat
/

Hannah-Pilot-Interface

Sleeping

App Files Files Community

fugthchat commited on Dec 20, 2025

Commit

1bde1ff

1 Parent(s): e477640

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -46

app.py CHANGED Viewed

@@ -2,12 +2,14 @@ import os
 import glob
 import json
 import psutil
 from fastapi import FastAPI, Request, HTTPException
 from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 from llama_cpp import Llama
-app = FastAPI()
 # --- CORS Permissions ---
 app.add_middleware(
@@ -20,94 +22,138 @@ app.add_middleware(
 # --- Configuration ---
 # Map filenames to "Hannah" names
-MODEL_MAP = {
     "qwen2.5-0.5b-instruct-q2_k.gguf": "Hannah-1.0 Light",
-    "qwen2.5-0.5b-instruct-q4_k_m.gguf": "Hannah-1.0 Heavy"
 }
-current_model = None
-current_model_name = ""
-def get_model(model_name):
     global current_model, current_model_name
-    if not model_name: raise HTTPException(status_code=400, detail="No model selected")
-    if not os.path.exists(model_name): raise HTTPException(status_code=404, detail="Model file not found")
     if current_model_name == model_name and current_model is not None:
         return current_model
     print(f"Loading {model_name}...")
-    if current_model is not None: del current_model
-    # --- PERFORMANCE TUNING ---
     current_model = Llama(
         model_path=model_name,
-        n_ctx=4096,      # Large memory for conversation history
-        n_threads=2,     # MAX for Hugging Face Free Tier (Crucial for speed)
-        n_batch=512,     # Process tokens in chunks
-        verbose=False
     )
     current_model_name = model_name
     return current_model
 @app.get("/api/models")
 async def list_models():
-    models_info = []
-    # Scan for .gguf files
     for f in glob.glob("*.gguf"):
-        display_name = MODEL_MAP.get(f, f)
         size_mb = os.path.getsize(f) / (1024 * 1024)
-        models_info.append({
-            "filename": f,
-            "display_name": display_name,
-            "size": f"{size_mb:.1f} MB"
-        })
     return {"models": models_info}
 @app.get("/api/status")
 async def system_status():
     ram = psutil.virtual_memory()
     return {
-        "ram_used": f"{ram.used / (1024*1024):.0f} MB",
-        "cpu": f"{psutil.cpu_percent()}%"
     }
 @app.post("/api/gen_title")
 async def gen_title(request: Request):
     try:
         data = await request.json()
-        message = data.get("message", "")
         words = message.split()[:4]
-        title = " ".join(words).capitalize() + "..."
-        return {"title": title}
-    except: return {"title": "New Chat"}
 @app.post("/api/chat")
 async def chat(request: Request):
     data = await request.json()
-    user_input = data.get("message")
     model_file = data.get("model")
     llm = get_model(model_file)
     def iter_response():
-        # --- PROMPT ENGINEERING FOR ACCURACY ---
-        # Qwen 2.5 specific format for best results
-        prompt = f"""<|im_start|>system
-You are Hannah 1.0, an intelligent, fast, and helpful pilot assistant. Answer efficiently.<|im_end|>
-<|im_start|>user
-{user_input}<|im_end|>
-<|im_start|>assistant
-"""
-        # Stream response
         stream = llm(
-            prompt,
-            max_tokens=2048,
-            stop=["<|im_end|>", "User:", "System:"], # Stop exactly when done
-            stream=True
         )
         for output in stream:
-            yield json.dumps({"text": output['choices'][0]['text']}) + "\n"
-    return StreamingResponse(iter_response(), media_type="application/x-ndjson")

 import glob
 import json
 import psutil
+from typing import Any, Dict, List, Optional
 from fastapi import FastAPI, Request, HTTPException
 from fastapi.responses import StreamingResponse
 from fastapi.middleware.cors import CORSMiddleware
 from llama_cpp import Llama
+app = FastAPI(title="Hannah Pilot Interface")
 # --- CORS Permissions ---
 app.add_middleware(
 # --- Configuration ---
 # Map filenames to "Hannah" names
+MODEL_MAP: Dict[str, str] = {
     "qwen2.5-0.5b-instruct-q2_k.gguf": "Hannah-1.0 Light",
+    "qwen2.5-0.5b-instruct-q4_k_m.gguf": "Hannah-1.0 Heavy",
 }
+current_model: Optional[Llama] = None
+current_model_name: str = ""
+def get_model(model_name: str) -> Llama:
     global current_model, current_model_name
+    if not model_name:
+        raise HTTPException(status_code=400, detail="No model selected")
+    if not os.path.exists(model_name):
+        raise HTTPException(status_code=404, detail="Model file not found")
     if current_model_name == model_name and current_model is not None:
         return current_model
     print(f"Loading {model_name}...")
+    if current_model is not None:
+        del current_model
+    # --- PERFORMANCE TUNING (HF Free CPU) ---
     current_model = Llama(
         model_path=model_name,
+        n_ctx=4096,
+        n_threads=2,
+        n_batch=512,
+        verbose=False,
     )
     current_model_name = model_name
     return current_model
+@app.get("/")
+async def root():
+    return {"status": "ok", "name": "Hannah-1.0"}
 @app.get("/api/models")
 async def list_models():
+    models_info: List[Dict[str, Any]] = []
     for f in glob.glob("*.gguf"):
+        display_name = MODEL_MAP.get(f, f)
         size_mb = os.path.getsize(f) / (1024 * 1024)
+        models_info.append(
+            {
+                "filename": f,
+                "display_name": display_name,
+                "size": f"{size_mb:.1f} MB",
+            }
+        )
+    # Stable ordering (Heavy first if present)
+    models_info.sort(
+        key=lambda x: (
+            "Heavy" not in x.get("display_name", ""),
+            x.get("display_name", ""),
+        )
+    )
     return {"models": models_info}
 @app.get("/api/status")
 async def system_status():
     ram = psutil.virtual_memory()
     return {
+        "ram_used": f"{ram.used / (1024 * 1024):.0f} MB",
+        "cpu": f"{psutil.cpu_percent()}%",
     }
 @app.post("/api/gen_title")
 async def gen_title(request: Request):
     try:
         data = await request.json()
+        message = (data.get("message") or "").strip()
         words = message.split()[:4]
+        title = " ".join(words).capitalize() + ("..." if words else "")
+        return {"title": title or "New Chat"}
+    except Exception:
+        return {"title": "New Chat"}
+def build_prompt(user_input: str, history: List[Dict[str, str]]) -> str:
+    # Qwen 2.5 chat format
+    system = (
+        "You are Hannah 1.0, an intelligent, fast, and helpful pilot assistant. "
+        "Answer efficiently and clearly."
+    )
+    parts: List[str] = ["<|im_start|>system\n" + system + "<|im_end|>\n"]
+    # Keep a small window of history for speed
+    for msg in history[-12:]:
+        role = msg.get("role")
+        content = msg.get("content") or ""
+        if role not in ("user", "assistant"):
+            continue
+        parts.append(f"<|im_start|>{role}\n{content}<|im_end|>\n")
+    parts.append(f"<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n")
+    return "".join(parts)
 @app.post("/api/chat")
 async def chat(request: Request):
     data = await request.json()
+    user_input = (data.get("message") or "").strip()
     model_file = data.get("model")
+    history = data.get("history") or []
+    if not user_input:
+        raise HTTPException(status_code=400, detail="Empty message")
     llm = get_model(model_file)
     def iter_response():
+        prompt = build_prompt(user_input, history)
         stream = llm(
+            prompt,
+            max_tokens=2048,
+            stop=["<|im_end|>", "User:", "System:"],
+            stream=True,
         )
         for output in stream:
+            token_text = output["choices"][0]["text"]
+            yield json.dumps({"text": token_text}) + "\n"
+    # NDJSON stream (frontend splits by newlines)
+    return StreamingResponse(iter_response(), media_type="application/x-ndjson")