SarmaHighOnAI commited on
Commit
b4f7b60
·
verified ·
1 Parent(s): 1f1d778

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -0
app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from llama_cpp import Llama
4
+ from huggingface_hub import hf_hub_download
5
+
6
+ app = FastAPI()
7
+
8
+ # 1. Define your specific model details
9
+ REPO_ID = "SarmaHighOnAI/physics-tutor-gguf"
10
+ FILENAME = "llama-3.2-3b-instruct.Q4_K_M.gguf"
11
+
12
+ print("Downloading your fine-tuned model...")
13
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
14
+
15
+ print("Loading model...")
16
+ # n_threads=2 ensures it runs smoothly on the free tier CPU
17
+ llm = Llama(model_path=model_path, n_ctx=2048, n_threads=2)
18
+
19
+ class Request(BaseModel):
20
+ prompt: str
21
+
22
+ @app.get("/")
23
+ def home():
24
+ return {"status": "Running", "message": "Your Fine-Tuned Physics API is Live!"}
25
+
26
+ @app.post("/generate")
27
+ def generate(request: Request):
28
+ # Standard prompt format for Llama 3
29
+ formatted_prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{request.prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
30
+
31
+ output = llm(formatted_prompt, max_tokens=512, stop=["<|eot_id|>"], echo=False)
32
+ return {"response": output["choices"][0]["text"]}