Spaces:

CooLLaMACEO
/

Gemma-Nano-Max

Build error

CooLLaMACEO commited on Feb 6

Commit

0c80cd5

verified ·

1 Parent(s): db5a668

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -1,25 +1,51 @@
-import uvicorn
-from fastapi import FastAPI
-import ollama
-app = FastAPI()
-MODEL_NAME = "gemma3" # Ollama will pull this automatically if told to
 @app.post("/completion")
-async def completion(prompt: str):
-    # This calls the Ollama service running on your machine/server
-    response = ollama.chat(model=MODEL_NAME, messages=[
-        {'role': 'user', 'content': prompt},
-    ])
-    return {"content": response['message']['content']}
-@app.get("/health")
-def health():
-    return {"status": "ready"}
 if __name__ == "__main__":
-    # Ensure the model is downloaded before starting
-    print(f"📦 Checking for model {MODEL_NAME}...")
-    ollama.pull(MODEL_NAME)
     uvicorn.run(app, host="0.0.0.0", port=7860)

+import os
+from fastapi import FastAPI, Request
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+from contextlib import asynccontextmanager
+llm = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global llm
+    print("📥 Downloading Gemma-3 from Hub...")
+    # This downloads the file to the HF cache
+    model_path = hf_hub_download(
+        repo_id="mradermacher/gemma-3-4b-it-GGUF",
+        filename="gemma-3-4b-it.Q4_K_M.gguf"
+    )
+    print("🚀 Loading Model...")
+    llm = Llama(
+        model_path=model_path,
+        n_ctx=2048,
+        n_threads=2 # Hugging Face free tier usually has 2 vCPUs
+    )
+    print("✅ Ready!")
+    yield
+app = FastAPI(lifespan=lifespan)
 @app.post("/completion")
+async def completion(request: Request):
+    data = await request.json()
+    prompt = data.get("prompt", "")
+    # Gemma-3 specific formatting
+    formatted_prompt = f"<|begin_of_text|>user\n{prompt}\nassistant\n"
+    output = llm(
+        formatted_prompt,
+        max_tokens=512,
+        stop=["<|end_of_text|>", "user"]
+    )
+    return {"content": output["choices"][0]["text"]}
+@app.get("/")
+def home():
+    return {"message": "Gemma-3 API is running on Hugging Face"}
 if __name__ == "__main__":
+    import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)