Spaces:

fugthchat
/

Hannah-Pilot-Interface

Sleeping

App Files Files Community

fugthchat commited on Dec 20, 2025

Commit

d3a8e9d

1 Parent(s): 53f9c70

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -71

app.py CHANGED Viewed

@@ -1,104 +1,92 @@
 import os
 import glob
 import json
-import uuid
 from fastapi import FastAPI, Request, HTTPException
-from fastapi.responses import HTMLResponse, StreamingResponse
-from fastapi.staticfiles import StaticFiles
-from fastapi.templating import Jinja2Templates
 from llama_cpp import Llama
 app = FastAPI()
-# --- Configuration ---
-MODEL_DIR = "."  # Looks for models in the root
 current_model = None
 current_model_name = ""
-# Serve static files
-app.mount("/static", StaticFiles(directory="static"), name="static")
-templates = Jinja2Templates(directory="templates")
-# --- Model Logic ---
 def get_model(model_name):
     global current_model, current_model_name
-    if not model_name:
-        raise HTTPException(status_code=400, detail="No model selected")
-    if current_model_name == model_name and current_model is not None:
-        return current_model
-    print(f"Loading new model: {model_name}...")
-    try:
-        # Unload previous model to free RAM
-        if current_model is not None:
-            del current_model
-        # Load new model (Optimized for Free Tier)
-        current_model = Llama(
-            model_path=model_name,
-            n_ctx=2048,      # Context window
-            n_threads=2,     # CPU threads (Free tier limit)
-            n_batch=512,
-            verbose=False
-        )
-        current_model_name = model_name
         return current_model
-    except Exception as e:
-        print(f"Load Error: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to load {model_name}")
-# --- Routes ---
-@app.get("/", response_class=HTMLResponse)
-async def read_root(request: Request):
-    return templates.TemplateResponse("index.html", {"request": request})
 @app.get("/api/models")
 async def list_models():
-    # Scans for .gguf files
-    models = glob.glob("*.gguf")
     return {"models": models}
 @app.post("/api/chat")
 async def chat(request: Request):
     data = await request.json()
     user_input = data.get("message")
     model_file = data.get("model")
-    history = data.get("history", []) # Receive conversation history if needed
     llm = get_model(model_file)
-    # Stream Generator
     def iter_response():
-        # System Prompt for Hannah
-        prompt = f"""<|im_start|>system
-You are Hannah, a highly intelligent and helpful AI assistant similar to Gemini and ChatGPT.
-<|im_end|>
-<|im_start|>user
-{user_input}<|im_end|>
-<|im_start|>assistant
-"""
-        stream = llm(
-            prompt,
-            max_tokens=1024,
-            stop=["<|im_end|>", "User:", "System:"],
-            stream=True,
-            temperature=0.7
-        )
         for output in stream:
-            text = output['choices'][0]['text']
-            yield json.dumps({"text": text}) + "\n"
-    return StreamingResponse(iter_response(), media_type="application/x-ndjson")
-@app.post("/api/gen_title")
-async def gen_title(request: Request):
-    # Simple logic to generate a 3-4 word title from the first message
-    data = await request.json()
-    message = data.get("message", "")
-    # In a real app, we'd ask the AI to summarize this. For speed:
-    words = message.split()[:4]
-    title = " ".join(words).capitalize() + "..."
-    return {"title": title}

 import os
 import glob
 import json
+import psutil # Added to check system health
 from fastapi import FastAPI, Request, HTTPException
+from fastapi.responses import StreamingResponse, JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
 from llama_cpp import Llama
 app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# --- Config ---
 current_model = None
 current_model_name = ""
 def get_model(model_name):
     global current_model, current_model_name
+    if not model_name: raise HTTPException(status_code=400, detail="No model selected")
+    # Check if file actually exists
+    if not os.path.exists(model_name):
+        raise HTTPException(status_code=404, detail=f"Model file {model_name} not found inside Space.")
+    if current_model_name == model_name and current_model is not None:
         return current_model
+    print(f"Loading {model_name}...")
+    if current_model is not None: del current_model
+    # Optimized for < 1GB models
+    current_model = Llama(
+        model_path=model_name,
+        n_ctx=4096,     # High context window
+        n_threads=2,    # Free Tier Max
+        n_batch=1024,
+        verbose=False
+    )
+    current_model_name = model_name
+    return current_model
 @app.get("/api/models")
 async def list_models():
+    # Returns file size and name for the table
+    models = []
+    for f in glob.glob("*.gguf"):
+        size_mb = os.path.getsize(f) / (1024 * 1024)
+        models.append({"name": f, "size": f"{size_mb:.1f} MB"})
     return {"models": models}
+@app.get("/api/status")
+async def system_status():
+    # Helper to show RAM usage in the table
+    ram = psutil.virtual_memory()
+    return {
+        "ram_used": f"{ram.used / (1024*1024):.0f} MB",
+        "ram_total": f"{ram.total / (1024*1024):.0f} MB",
+        "cpu": f"{psutil.cpu_percent()}%"
+    }
+@app.post("/api/gen_title")
+async def gen_title(request: Request):
+    try:
+        data = await request.json()
+        message = data.get("message", "")
+        words = message.split()[:4]
+        title = " ".join(words).capitalize() + "..."
+        return {"title": title}
+    except: return {"title": "New Chat"}
 @app.post("/api/chat")
 async def chat(request: Request):
     data = await request.json()
     user_input = data.get("message")
     model_file = data.get("model")
     llm = get_model(model_file)
     def iter_response():
+        prompt = f"<|im_start|>system\nYou are Hannah 1.0, an intelligent pilot assistant.<|im_end|>\n<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
+        stream = llm(prompt, max_tokens=2048, stop=["<|im_end|>"], stream=True)
         for output in stream:
+            yield json.dumps({"text": output['choices'][0]['text']}) + "\n"
+    return StreamingResponse(iter_response(), media_type="application/x-ndjson")