CooLLaMACEO commited on
Commit
8655754
·
verified ·
1 Parent(s): 414e142

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -28
app.py CHANGED
@@ -1,36 +1,39 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
- from lmstudio import LLM # LM Studio Python interface
4
- from fastapi.middleware.cors import CORSMiddleware
5
-
6
- # FastAPI app
7
- app = FastAPI(title="ChatGPT OS 1.0", description="Local AI Chat API", version="1.0")
8
-
9
- # Enable CORS so frontend can connect from any origin
10
- app.add_middleware(
11
- CORSMiddleware,
12
- allow_origins=["*"],
13
- allow_methods=["*"],
14
- allow_headers=["*"]
 
 
 
15
  )
16
 
17
- # Load the GPT-OSS model
18
- model_path = "./models/gpt-oss-20b-Q3_K_M.gguf"
19
- llm = LLM(model_path=model_path, context_length=16384, flash_attention=True)
20
 
21
- # Request body schema
22
  class ChatRequest(BaseModel):
23
- message: str
24
 
25
- # Chat endpoint
26
  @app.post("/chat")
27
- async def chat(req: ChatRequest):
28
- user_message = req.message
29
- # Generate response from the model
30
- response = llm.generate(user_message, max_new_tokens=256)
31
- return {"response": response}
32
-
33
- # Optional: health check
34
- @app.get("/health")
35
- def health():
36
- return {"status": "ok", "model_loaded": True}
 
 
 
 
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ from llama_cpp import Llama
4
+ import os
5
+
6
+ app = FastAPI()
7
+
8
+ MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
9
+
10
+ print("🔄 Loading model… this may take a while")
11
+
12
+ llm = Llama(
13
+ model_path=MODEL_PATH,
14
+ n_ctx=16384,
15
+ n_threads=os.cpu_count(),
16
+ n_gpu_layers=0, # HF CPU-only unless paid GPU
17
+ verbose=False,
18
  )
19
 
20
+ print("✅ Model loaded successfully")
 
 
21
 
 
22
  class ChatRequest(BaseModel):
23
+ prompt: str
24
 
 
25
  @app.post("/chat")
26
+ def chat(req: ChatRequest):
27
+ output = llm(
28
+ f"User: {req.prompt}\nAssistant:",
29
+ max_tokens=512,
30
+ stop=["User:"],
31
+ )
32
+
33
+ return {
34
+ "response": output["choices"][0]["text"].strip()
35
+ }
36
+
37
+ @app.get("/")
38
+ def root():
39
+ return {"status": "ChatGPT Open-Source 1.0 is running 🚀"}