Spaces:

fugthchat
/

fugthdes

Sleeping

App Files Files Community

fugthchat commited on Sep 6, 2025

Commit

3868f72

verified ·

1 Parent(s): c48eb55

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -12

app.py CHANGED Viewed

@@ -8,12 +8,9 @@ import requests
 from tqdm import tqdm
 # --- Configuration ---
-# This is the model the Space will download and use if it's not already present.
-# You can change this to any GGUF model compatible with llama-cpp-python.
 MODEL_NAME = "stablelm-zephyr-3b.Q3_K_S.gguf"
 MODEL_URL = f"https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/{MODEL_NAME}"
 MODEL_PATH = f"./{MODEL_NAME}"
-N_GPU_LAYERS = -1 # -1 to offload all layers to GPU, 0 for CPU only. Adjust if needed.
 N_CTX = 4096 # Context window size.
 # --- Model Download ---
@@ -38,46 +35,41 @@ def download_model():
 # --- FastAPI App ---
 app = FastAPI()
-# Add CORS middleware to allow requests from your website
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["http://fugthdesign.space", "https://fugthdesign.space"], # Your website URL
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
-# Load the model on startup
 llm = None
 @app.on_event("startup")
 def load_llm():
     global llm
     if download_model():
         try:
-            print("Loading GGUF model...")
             llm = Llama(
                 model_path=MODEL_PATH,
                 n_ctx=N_CTX,
-                n_gpu_layers=N_GPU_LAYERS,
                 verbose=True
             )
-            print("Model loaded successfully!")
         except Exception as e:
             print(f"Error loading model: {e}")
-# Define the structure of the data your frontend will send
 class ChatRequest(BaseModel):
     userInput: str
     persona: str
     localKnowledge: str
-# --- API Endpoint ---
 @app.post("/chat")
 async def chat(request: ChatRequest):
     if not llm:
         return {"error": "Model is not loaded or failed to load."}
-    # Construct the prompt using the data from the frontend
     system_prompt = request.persona or "You are a helpful AI assistant."
     knowledge_context = f"Use the following context to inform your answer:\n---CONTEXT---\n{request.localKnowledge}\n---END CONTEXT---" if request.localKnowledge else ""

 from tqdm import tqdm
 # --- Configuration ---
 MODEL_NAME = "stablelm-zephyr-3b.Q3_K_S.gguf"
 MODEL_URL = f"https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/{MODEL_NAME}"
 MODEL_PATH = f"./{MODEL_NAME}"
 N_CTX = 4096 # Context window size.
 # --- Model Download ---
 # --- FastAPI App ---
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["http://fugthdesign.space", "https://fugthdesign.space"],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
 llm = None
 @app.on_event("startup")
 def load_llm():
     global llm
     if download_model():
         try:
+            print("Loading GGUF model for CPU...")
             llm = Llama(
                 model_path=MODEL_PATH,
                 n_ctx=N_CTX,
+                n_gpu_layers=0,  # ** THIS IS THE KEY CHANGE FOR CPU **
                 verbose=True
             )
+            print("Model loaded successfully on CPU!")
         except Exception as e:
             print(f"Error loading model: {e}")
 class ChatRequest(BaseModel):
     userInput: str
     persona: str
     localKnowledge: str
 @app.post("/chat")
 async def chat(request: ChatRequest):
     if not llm:
         return {"error": "Model is not loaded or failed to load."}
     system_prompt = request.persona or "You are a helpful AI assistant."
     knowledge_context = f"Use the following context to inform your answer:\n---CONTEXT---\n{request.localKnowledge}\n---END CONTEXT---" if request.localKnowledge else ""