SarmaHighOnAI commited on
Commit
3ebd60c
·
verified ·
1 Parent(s): b4f7b60

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -20
app.py CHANGED
@@ -1,32 +1,22 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
- from llama_cpp import Llama
4
- from huggingface_hub import hf_hub_download
5
 
6
  app = FastAPI()
7
-
8
- # 1. Define your specific model details
9
- REPO_ID = "SarmaHighOnAI/physics-tutor-gguf"
10
- FILENAME = "llama-3.2-3b-instruct.Q4_K_M.gguf"
11
-
12
- print("Downloading your fine-tuned model...")
13
- model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
14
-
15
- print("Loading model...")
16
- # n_threads=2 ensures it runs smoothly on the free tier CPU
17
- llm = Llama(model_path=model_path, n_ctx=2048, n_threads=2)
18
 
19
  class Request(BaseModel):
20
  prompt: str
21
 
22
  @app.get("/")
23
  def home():
24
- return {"status": "Running", "message": "Your Fine-Tuned Physics API is Live!"}
25
 
26
  @app.post("/generate")
27
- def generate(request: Request):
28
- # Standard prompt format for Llama 3
29
- formatted_prompt = f"<|start_header_id|>user<|end_header_id|>\n\n{request.prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
30
-
31
- output = llm(formatted_prompt, max_tokens=512, stop=["<|eot_id|>"], echo=False)
32
- return {"response": output["choices"][0]["text"]}
 
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
+ from huggingface_hub import InferenceClient
 
4
 
5
  app = FastAPI()
6
+ client = InferenceClient(token="YOUR_HF_TOKEN_HERE")
 
 
 
 
 
 
 
 
 
 
7
 
8
  class Request(BaseModel):
9
  prompt: str
10
 
11
  @app.get("/")
12
  def home():
13
+ return {"status": "Running"}
14
 
15
  @app.post("/generate")
16
+ def generate_text(request: Request):
17
+ response = client.text_generation(
18
+ request.prompt,
19
+ model="meta-llama/Llama-3.2-3B-Instruct",
20
+ max_new_tokens=256
21
+ )
22
+ return {"response": response}