fugthchat commited on
Commit
842bd56
·
1 Parent(s): 3f3c728

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ import json
4
+ import uuid
5
+ from fastapi import FastAPI, Request, HTTPException
6
+ from fastapi.responses import HTMLResponse, StreamingResponse
7
+ from fastapi.staticfiles import StaticFiles
8
+ from fastapi.templating import Jinja2Templates
9
+ from llama_cpp import Llama
10
+
11
+ app = FastAPI()
12
+
13
+ # --- Configuration ---
14
+ MODEL_DIR = "." # Looks for models in the root
15
+ current_model = None
16
+ current_model_name = ""
17
+
18
+ # Serve static files
19
+ app.mount("/static", StaticFiles(directory="static"), name="static")
20
+ templates = Jinja2Templates(directory="templates")
21
+
22
+ # --- Model Logic ---
23
+ def get_model(model_name):
24
+ global current_model, current_model_name
25
+
26
+ if not model_name:
27
+ raise HTTPException(status_code=400, detail="No model selected")
28
+
29
+ if current_model_name == model_name and current_model is not None:
30
+ return current_model
31
+
32
+ print(f"Loading new model: {model_name}...")
33
+ try:
34
+ # Unload previous model to free RAM
35
+ if current_model is not None:
36
+ del current_model
37
+
38
+ # Load new model (Optimized for Free Tier)
39
+ current_model = Llama(
40
+ model_path=model_name,
41
+ n_ctx=2048, # Context window
42
+ n_threads=2, # CPU threads (Free tier limit)
43
+ n_batch=512,
44
+ verbose=False
45
+ )
46
+ current_model_name = model_name
47
+ return current_model
48
+ except Exception as e:
49
+ print(f"Load Error: {e}")
50
+ raise HTTPException(status_code=500, detail=f"Failed to load {model_name}")
51
+
52
+ # --- Routes ---
53
+
54
+ @app.get("/", response_class=HTMLResponse)
55
+ async def read_root(request: Request):
56
+ return templates.TemplateResponse("index.html", {"request": request})
57
+
58
+ @app.get("/api/models")
59
+ async def list_models():
60
+ # Scans for .gguf files
61
+ models = glob.glob("*.gguf")
62
+ return {"models": models}
63
+
64
+ @app.post("/api/chat")
65
+ async def chat(request: Request):
66
+ data = await request.json()
67
+ user_input = data.get("message")
68
+ model_file = data.get("model")
69
+ history = data.get("history", []) # Receive conversation history if needed
70
+
71
+ llm = get_model(model_file)
72
+
73
+ # Stream Generator
74
+ def iter_response():
75
+ # System Prompt for Hannah
76
+ prompt = f"""<|im_start|>system
77
+ You are Hannah, a highly intelligent and helpful AI assistant similar to Gemini and ChatGPT.
78
+ <|im_end|>
79
+ <|im_start|>user
80
+ {user_input}<|im_end|>
81
+ <|im_start|>assistant
82
+ """
83
+ stream = llm(
84
+ prompt,
85
+ max_tokens=1024,
86
+ stop=["<|im_end|>", "User:", "System:"],
87
+ stream=True,
88
+ temperature=0.7
89
+ )
90
+ for output in stream:
91
+ text = output['choices'][0]['text']
92
+ yield json.dumps({"text": text}) + "\n"
93
+
94
+ return StreamingResponse(iter_response(), media_type="application/x-ndjson")
95
+
96
+ @app.post("/api/gen_title")
97
+ async def gen_title(request: Request):
98
+ # Simple logic to generate a 3-4 word title from the first message
99
+ data = await request.json()
100
+ message = data.get("message", "")
101
+ # In a real app, we'd ask the AI to summarize this. For speed:
102
+ words = message.split()[:4]
103
+ title = " ".join(words).capitalize() + "..."
104
+ return {"title": title}