fugthchat commited on
Commit
d3a8e9d
·
1 Parent(s): 53f9c70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -71
app.py CHANGED
@@ -1,104 +1,92 @@
1
  import os
2
  import glob
3
  import json
4
- import uuid
5
  from fastapi import FastAPI, Request, HTTPException
6
- from fastapi.responses import HTMLResponse, StreamingResponse
7
- from fastapi.staticfiles import StaticFiles
8
- from fastapi.templating import Jinja2Templates
9
  from llama_cpp import Llama
10
 
11
  app = FastAPI()
12
 
13
- # --- Configuration ---
14
- MODEL_DIR = "." # Looks for models in the root
 
 
 
 
 
 
 
15
  current_model = None
16
  current_model_name = ""
17
 
18
- # Serve static files
19
- app.mount("/static", StaticFiles(directory="static"), name="static")
20
- templates = Jinja2Templates(directory="templates")
21
-
22
- # --- Model Logic ---
23
  def get_model(model_name):
24
  global current_model, current_model_name
 
25
 
26
- if not model_name:
27
- raise HTTPException(status_code=400, detail="No model selected")
28
-
29
- if current_model_name == model_name and current_model is not None:
30
- return current_model
31
 
32
- print(f"Loading new model: {model_name}...")
33
- try:
34
- # Unload previous model to free RAM
35
- if current_model is not None:
36
- del current_model
37
-
38
- # Load new model (Optimized for Free Tier)
39
- current_model = Llama(
40
- model_path=model_name,
41
- n_ctx=2048, # Context window
42
- n_threads=2, # CPU threads (Free tier limit)
43
- n_batch=512,
44
- verbose=False
45
- )
46
- current_model_name = model_name
47
  return current_model
48
- except Exception as e:
49
- print(f"Load Error: {e}")
50
- raise HTTPException(status_code=500, detail=f"Failed to load {model_name}")
51
-
52
- # --- Routes ---
53
 
54
- @app.get("/", response_class=HTMLResponse)
55
- async def read_root(request: Request):
56
- return templates.TemplateResponse("index.html", {"request": request})
 
 
 
 
 
 
 
 
 
 
57
 
58
  @app.get("/api/models")
59
  async def list_models():
60
- # Scans for .gguf files
61
- models = glob.glob("*.gguf")
 
 
 
62
  return {"models": models}
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  @app.post("/api/chat")
65
  async def chat(request: Request):
66
  data = await request.json()
67
  user_input = data.get("message")
68
  model_file = data.get("model")
69
- history = data.get("history", []) # Receive conversation history if needed
70
 
71
  llm = get_model(model_file)
72
 
73
- # Stream Generator
74
  def iter_response():
75
- # System Prompt for Hannah
76
- prompt = f"""<|im_start|>system
77
- You are Hannah, a highly intelligent and helpful AI assistant similar to Gemini and ChatGPT.
78
- <|im_end|>
79
- <|im_start|>user
80
- {user_input}<|im_end|>
81
- <|im_start|>assistant
82
- """
83
- stream = llm(
84
- prompt,
85
- max_tokens=1024,
86
- stop=["<|im_end|>", "User:", "System:"],
87
- stream=True,
88
- temperature=0.7
89
- )
90
  for output in stream:
91
- text = output['choices'][0]['text']
92
- yield json.dumps({"text": text}) + "\n"
93
 
94
- return StreamingResponse(iter_response(), media_type="application/x-ndjson")
95
-
96
- @app.post("/api/gen_title")
97
- async def gen_title(request: Request):
98
- # Simple logic to generate a 3-4 word title from the first message
99
- data = await request.json()
100
- message = data.get("message", "")
101
- # In a real app, we'd ask the AI to summarize this. For speed:
102
- words = message.split()[:4]
103
- title = " ".join(words).capitalize() + "..."
104
- return {"title": title}
 
1
  import os
2
  import glob
3
  import json
4
+ import psutil # Added to check system health
5
  from fastapi import FastAPI, Request, HTTPException
6
+ from fastapi.responses import StreamingResponse, JSONResponse
7
+ from fastapi.middleware.cors import CORSMiddleware
 
8
  from llama_cpp import Llama
9
 
10
  app = FastAPI()
11
 
12
+ app.add_middleware(
13
+ CORSMiddleware,
14
+ allow_origins=["*"],
15
+ allow_credentials=True,
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
18
+ )
19
+
20
+ # --- Config ---
21
  current_model = None
22
  current_model_name = ""
23
 
 
 
 
 
 
24
  def get_model(model_name):
25
  global current_model, current_model_name
26
+ if not model_name: raise HTTPException(status_code=400, detail="No model selected")
27
 
28
+ # Check if file actually exists
29
+ if not os.path.exists(model_name):
30
+ raise HTTPException(status_code=404, detail=f"Model file {model_name} not found inside Space.")
 
 
31
 
32
+ if current_model_name == model_name and current_model is not None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  return current_model
 
 
 
 
 
34
 
35
+ print(f"Loading {model_name}...")
36
+ if current_model is not None: del current_model
37
+
38
+ # Optimized for < 1GB models
39
+ current_model = Llama(
40
+ model_path=model_name,
41
+ n_ctx=4096, # High context window
42
+ n_threads=2, # Free Tier Max
43
+ n_batch=1024,
44
+ verbose=False
45
+ )
46
+ current_model_name = model_name
47
+ return current_model
48
 
49
  @app.get("/api/models")
50
  async def list_models():
51
+ # Returns file size and name for the table
52
+ models = []
53
+ for f in glob.glob("*.gguf"):
54
+ size_mb = os.path.getsize(f) / (1024 * 1024)
55
+ models.append({"name": f, "size": f"{size_mb:.1f} MB"})
56
  return {"models": models}
57
 
58
+ @app.get("/api/status")
59
+ async def system_status():
60
+ # Helper to show RAM usage in the table
61
+ ram = psutil.virtual_memory()
62
+ return {
63
+ "ram_used": f"{ram.used / (1024*1024):.0f} MB",
64
+ "ram_total": f"{ram.total / (1024*1024):.0f} MB",
65
+ "cpu": f"{psutil.cpu_percent()}%"
66
+ }
67
+
68
+ @app.post("/api/gen_title")
69
+ async def gen_title(request: Request):
70
+ try:
71
+ data = await request.json()
72
+ message = data.get("message", "")
73
+ words = message.split()[:4]
74
+ title = " ".join(words).capitalize() + "..."
75
+ return {"title": title}
76
+ except: return {"title": "New Chat"}
77
+
78
  @app.post("/api/chat")
79
  async def chat(request: Request):
80
  data = await request.json()
81
  user_input = data.get("message")
82
  model_file = data.get("model")
 
83
 
84
  llm = get_model(model_file)
85
 
 
86
  def iter_response():
87
+ prompt = f"<|im_start|>system\nYou are Hannah 1.0, an intelligent pilot assistant.<|im_end|>\n<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
88
+ stream = llm(prompt, max_tokens=2048, stop=["<|im_end|>"], stream=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  for output in stream:
90
+ yield json.dumps({"text": output['choices'][0]['text']}) + "\n"
 
91
 
92
+ return StreamingResponse(iter_response(), media_type="application/x-ndjson")