Spaces:
Build error
Build error
| import os | |
| from fastapi import FastAPI, Request | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| from contextlib import asynccontextmanager | |
| llm = None | |
| async def lifespan(app: FastAPI): | |
| global llm | |
| print("π₯ Downloading Gemma-3 from Hub...") | |
| # This downloads the file to the HF cache | |
| model_path = hf_hub_download( | |
| repo_id="mradermacher/gemma-3-4b-it-GGUF", | |
| filename="gemma-3-4b-it.Q4_K_M.gguf" | |
| ) | |
| print("π Loading Model...") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=2048, | |
| n_threads=2 # Hugging Face free tier usually has 2 vCPUs | |
| ) | |
| print("β Ready!") | |
| yield | |
| app = FastAPI(lifespan=lifespan) | |
| async def completion(request: Request): | |
| data = await request.json() | |
| prompt = data.get("prompt", "") | |
| # Gemma-3 specific formatting | |
| formatted_prompt = f"<|begin_of_text|>user\n{prompt}\nassistant\n" | |
| output = llm( | |
| formatted_prompt, | |
| max_tokens=512, | |
| stop=["<|end_of_text|>", "user"] | |
| ) | |
| return {"content": output["choices"][0]["text"]} | |
| def home(): | |
| return {"message": "Gemma-3 API is running on Hugging Face"} | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |