Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from transformers import AutoTokenizer, LlamaForCausalLM | |
| import torch | |
| import os | |
| app = FastAPI(title="My 500M AI API") | |
| # Pointing to the Model you built! | |
| REPO_NAME = "Sdey10/My-500M-Mini-TUF" | |
| print("Downloading Model from Hugging Face...") | |
| # We fetch the public model without hardcoding your secret token | |
| tokenizer = AutoTokenizer.from_pretrained(REPO_NAME) | |
| model = LlamaForCausalLM.from_pretrained(REPO_NAME) | |
| # Free Hugging Face Spaces run on CPUs | |
| model.to("cpu") | |
| model.eval() | |
| class PromptRequest(BaseModel): | |
| prompt: str | |
| max_tokens: int = 50 | |
| def generate_text(request: PromptRequest): | |
| inputs = tokenizer(request.prompt, return_tensors="pt").to("cpu") | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=request.max_tokens, | |
| temperature=0.7, | |
| do_sample=True, | |
| repetition_penalty=1.2 | |
| ) | |
| response_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return {"response": response_text} | |