fugthchat commited on
Commit
46a2271
·
1 Parent(s): 919f957

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -17
app.py CHANGED
@@ -1,9 +1,9 @@
1
  import os
2
  import glob
3
  import json
4
- import psutil # Added to check system health
5
  from fastapi import FastAPI, Request, HTTPException
6
- from fastapi.responses import StreamingResponse, JSONResponse
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from llama_cpp import Llama
9
 
@@ -17,17 +17,21 @@ app.add_middleware(
17
  allow_headers=["*"],
18
  )
19
 
20
- # --- Config ---
 
 
 
 
 
 
21
  current_model = None
22
  current_model_name = ""
23
 
24
  def get_model(model_name):
25
  global current_model, current_model_name
26
- if not model_name: raise HTTPException(status_code=400, detail="No model selected")
27
 
28
- # Check if file actually exists
29
- if not os.path.exists(model_name):
30
- raise HTTPException(status_code=404, detail=f"Model file {model_name} not found inside Space.")
31
 
32
  if current_model_name == model_name and current_model is not None:
33
  return current_model
@@ -35,12 +39,12 @@ def get_model(model_name):
35
  print(f"Loading {model_name}...")
36
  if current_model is not None: del current_model
37
 
38
- # Optimized for < 1GB models
39
  current_model = Llama(
40
  model_path=model_name,
41
- n_ctx=4096, # High context window
42
- n_threads=2, # Free Tier Max
43
- n_batch=1024,
44
  verbose=False
45
  )
46
  current_model_name = model_name
@@ -48,20 +52,23 @@ def get_model(model_name):
48
 
49
  @app.get("/api/models")
50
  async def list_models():
51
- # Returns file size and name for the table
52
- models = []
53
  for f in glob.glob("*.gguf"):
 
54
  size_mb = os.path.getsize(f) / (1024 * 1024)
55
- models.append({"name": f, "size": f"{size_mb:.1f} MB"})
56
- return {"models": models}
 
 
 
 
57
 
58
  @app.get("/api/status")
59
  async def system_status():
60
- # Helper to show RAM usage in the table
61
  ram = psutil.virtual_memory()
62
  return {
63
  "ram_used": f"{ram.used / (1024*1024):.0f} MB",
64
- "ram_total": f"{ram.total / (1024*1024):.0f} MB",
65
  "cpu": f"{psutil.cpu_percent()}%"
66
  }
67
 
@@ -84,6 +91,7 @@ async def chat(request: Request):
84
  llm = get_model(model_file)
85
 
86
  def iter_response():
 
87
  prompt = f"<|im_start|>system\nYou are Hannah 1.0, an intelligent pilot assistant.<|im_end|>\n<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
88
  stream = llm(prompt, max_tokens=2048, stop=["<|im_end|>"], stream=True)
89
  for output in stream:
 
1
  import os
2
  import glob
3
  import json
4
+ import psutil
5
  from fastapi import FastAPI, Request, HTTPException
6
+ from fastapi.responses import StreamingResponse
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from llama_cpp import Llama
9
 
 
17
  allow_headers=["*"],
18
  )
19
 
20
+ # --- Configuration ---
21
+ # Map the real filenames to your preferred names
22
+ MODEL_MAP = {
23
+ "qwen2.5-0.5b-instruct-q2_k.gguf": "Hannah-1.0 Light",
24
+ "qwen2.5-0.5b-instruct-q4_k_m.gguf": "Hannah-1.0 Heavy"
25
+ }
26
+
27
  current_model = None
28
  current_model_name = ""
29
 
30
  def get_model(model_name):
31
  global current_model, current_model_name
 
32
 
33
+ if not model_name: raise HTTPException(status_code=400, detail="No model selected")
34
+ if not os.path.exists(model_name): raise HTTPException(status_code=404, detail="Model file not found")
 
35
 
36
  if current_model_name == model_name and current_model is not None:
37
  return current_model
 
39
  print(f"Loading {model_name}...")
40
  if current_model is not None: del current_model
41
 
42
+ # Speed Optimization for 0.5B
43
  current_model = Llama(
44
  model_path=model_name,
45
+ n_ctx=4096,
46
+ n_threads=2,
47
+ n_batch=1024,
48
  verbose=False
49
  )
50
  current_model_name = model_name
 
52
 
53
  @app.get("/api/models")
54
  async def list_models():
55
+ models_info = []
56
+ # Only look for the files you uploaded
57
  for f in glob.glob("*.gguf"):
58
+ display_name = MODEL_MAP.get(f, f) # Use custom name if available, else filename
59
  size_mb = os.path.getsize(f) / (1024 * 1024)
60
+ models_info.append({
61
+ "filename": f,
62
+ "display_name": display_name,
63
+ "size": f"{size_mb:.1f} MB"
64
+ })
65
+ return {"models": models_info}
66
 
67
  @app.get("/api/status")
68
  async def system_status():
 
69
  ram = psutil.virtual_memory()
70
  return {
71
  "ram_used": f"{ram.used / (1024*1024):.0f} MB",
 
72
  "cpu": f"{psutil.cpu_percent()}%"
73
  }
74
 
 
91
  llm = get_model(model_file)
92
 
93
  def iter_response():
94
+ # Standard ChatML Prompt
95
  prompt = f"<|im_start|>system\nYou are Hannah 1.0, an intelligent pilot assistant.<|im_end|>\n<|im_start|>user\n{user_input}<|im_end|>\n<|im_start|>assistant\n"
96
  stream = llm(prompt, max_tokens=2048, stop=["<|im_end|>"], stream=True)
97
  for output in stream: