My-500M-API / app.py
Sdey10's picture
Upload app.py with huggingface_hub
e21e5ea verified
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, LlamaForCausalLM
import torch
import os
app = FastAPI(title="My 500M AI API")
# Pointing to the Model you built!
REPO_NAME = "Sdey10/My-500M-Mini-TUF"
print("Downloading Model from Hugging Face...")
# We fetch the public model without hardcoding your secret token
tokenizer = AutoTokenizer.from_pretrained(REPO_NAME)
model = LlamaForCausalLM.from_pretrained(REPO_NAME)
# Free Hugging Face Spaces run on CPUs
model.to("cpu")
model.eval()
class PromptRequest(BaseModel):
prompt: str
max_tokens: int = 50
@app.post("/generate")
def generate_text(request: PromptRequest):
inputs = tokenizer(request.prompt, return_tensors="pt").to("cpu")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=request.max_tokens,
temperature=0.7,
do_sample=True,
repetition_penalty=1.2
)
response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return {"response": response_text}