fugthchat commited on
Commit
3868f72
·
verified ·
1 Parent(s): c48eb55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -12
app.py CHANGED
@@ -8,12 +8,9 @@ import requests
8
  from tqdm import tqdm
9
 
10
  # --- Configuration ---
11
- # This is the model the Space will download and use if it's not already present.
12
- # You can change this to any GGUF model compatible with llama-cpp-python.
13
  MODEL_NAME = "stablelm-zephyr-3b.Q3_K_S.gguf"
14
  MODEL_URL = f"https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/{MODEL_NAME}"
15
  MODEL_PATH = f"./{MODEL_NAME}"
16
- N_GPU_LAYERS = -1 # -1 to offload all layers to GPU, 0 for CPU only. Adjust if needed.
17
  N_CTX = 4096 # Context window size.
18
 
19
  # --- Model Download ---
@@ -38,46 +35,41 @@ def download_model():
38
  # --- FastAPI App ---
39
  app = FastAPI()
40
 
41
- # Add CORS middleware to allow requests from your website
42
  app.add_middleware(
43
  CORSMiddleware,
44
- allow_origins=["http://fugthdesign.space", "https://fugthdesign.space"], # Your website URL
45
  allow_credentials=True,
46
  allow_methods=["*"],
47
  allow_headers=["*"],
48
  )
49
 
50
- # Load the model on startup
51
  llm = None
52
  @app.on_event("startup")
53
  def load_llm():
54
  global llm
55
  if download_model():
56
  try:
57
- print("Loading GGUF model...")
58
  llm = Llama(
59
  model_path=MODEL_PATH,
60
  n_ctx=N_CTX,
61
- n_gpu_layers=N_GPU_LAYERS,
62
  verbose=True
63
  )
64
- print("Model loaded successfully!")
65
  except Exception as e:
66
  print(f"Error loading model: {e}")
67
 
68
- # Define the structure of the data your frontend will send
69
  class ChatRequest(BaseModel):
70
  userInput: str
71
  persona: str
72
  localKnowledge: str
73
 
74
- # --- API Endpoint ---
75
  @app.post("/chat")
76
  async def chat(request: ChatRequest):
77
  if not llm:
78
  return {"error": "Model is not loaded or failed to load."}
79
 
80
- # Construct the prompt using the data from the frontend
81
  system_prompt = request.persona or "You are a helpful AI assistant."
82
  knowledge_context = f"Use the following context to inform your answer:\n---CONTEXT---\n{request.localKnowledge}\n---END CONTEXT---" if request.localKnowledge else ""
83
 
 
8
  from tqdm import tqdm
9
 
10
  # --- Configuration ---
 
 
11
  MODEL_NAME = "stablelm-zephyr-3b.Q3_K_S.gguf"
12
  MODEL_URL = f"https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/{MODEL_NAME}"
13
  MODEL_PATH = f"./{MODEL_NAME}"
 
14
  N_CTX = 4096 # Context window size.
15
 
16
  # --- Model Download ---
 
35
  # --- FastAPI App ---
36
  app = FastAPI()
37
 
 
38
  app.add_middleware(
39
  CORSMiddleware,
40
+ allow_origins=["http://fugthdesign.space", "https://fugthdesign.space"],
41
  allow_credentials=True,
42
  allow_methods=["*"],
43
  allow_headers=["*"],
44
  )
45
 
 
46
  llm = None
47
  @app.on_event("startup")
48
  def load_llm():
49
  global llm
50
  if download_model():
51
  try:
52
+ print("Loading GGUF model for CPU...")
53
  llm = Llama(
54
  model_path=MODEL_PATH,
55
  n_ctx=N_CTX,
56
+ n_gpu_layers=0, # ** THIS IS THE KEY CHANGE FOR CPU **
57
  verbose=True
58
  )
59
+ print("Model loaded successfully on CPU!")
60
  except Exception as e:
61
  print(f"Error loading model: {e}")
62
 
 
63
  class ChatRequest(BaseModel):
64
  userInput: str
65
  persona: str
66
  localKnowledge: str
67
 
 
68
  @app.post("/chat")
69
  async def chat(request: ChatRequest):
70
  if not llm:
71
  return {"error": "Model is not loaded or failed to load."}
72
 
 
73
  system_prompt = request.persona or "You are a helpful AI assistant."
74
  knowledge_context = f"Use the following context to inform your answer:\n---CONTEXT---\n{request.localKnowledge}\n---END CONTEXT---" if request.localKnowledge else ""
75