Sdey10 commited on
Commit
e21e5ea
·
verified ·
1 Parent(s): 6f232c1

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from transformers import AutoTokenizer, LlamaForCausalLM
4
+ import torch
5
+ import os
6
+
7
+ app = FastAPI(title="My 500M AI API")
8
+
9
+ # Pointing to the Model you built!
10
+ REPO_NAME = "Sdey10/My-500M-Mini-TUF"
11
+
12
+ print("Downloading Model from Hugging Face...")
13
+ # We fetch the public model without hardcoding your secret token
14
+ tokenizer = AutoTokenizer.from_pretrained(REPO_NAME)
15
+ model = LlamaForCausalLM.from_pretrained(REPO_NAME)
16
+
17
+ # Free Hugging Face Spaces run on CPUs
18
+ model.to("cpu")
19
+ model.eval()
20
+
21
+ class PromptRequest(BaseModel):
22
+ prompt: str
23
+ max_tokens: int = 50
24
+
25
+ @app.post("/generate")
26
+ def generate_text(request: PromptRequest):
27
+ inputs = tokenizer(request.prompt, return_tensors="pt").to("cpu")
28
+ with torch.no_grad():
29
+ outputs = model.generate(
30
+ **inputs,
31
+ max_new_tokens=request.max_tokens,
32
+ temperature=0.7,
33
+ do_sample=True,
34
+ repetition_penalty=1.2
35
+ )
36
+
37
+ response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
38
+ return {"response": response_text}