Spaces:

manishksharma98
/

personal-ai

Sleeping

App Files Files Community

Muglu commited on Feb 18

Commit

0a3bf97

1 Parent(s): bac9742

apiExpose

Browse files

Files changed (3) hide show

app.py +79 -4
requirements.txt +2 -0
start.sh +2 -2

app.py CHANGED Viewed

@@ -1,10 +1,14 @@
 """
-Gradio app that proxies generation to a local Ollama instance.
-Exposes a web UI and Gradio's API (e.g. /api/predict) for your local app.
 """
 import os
 import requests
 import gradio as gr
 OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "http://127.0.0.1:11434")
 OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "tinyllama")
@@ -25,11 +29,82 @@ def generate_text(prompt):
     except requests.RequestException as e:
         return f"Error: {e}"
 demo = gr.Interface(
     fn=generate_text,
     inputs="text",
     outputs="text",
     title="Ollama on HF",
-    description="Generate text with Ollama. Use this UI or call the Gradio API from your local app.",
 )
-demo.launch(server_name="0.0.0.0", server_port=7860)

 """
+Gradio UI + FastAPI /api/generate so the Space serves both the web UI and the API.
+Your local app can call: GET/POST .../api/generate
 """
 import os
 import requests
 import gradio as gr
+from fastapi import FastAPI, Query, Body
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
 OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "http://127.0.0.1:11434")
 OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "tinyllama")
     except requests.RequestException as e:
         return f"Error: {e}"
+# Gradio app (mounted at /)
 demo = gr.Interface(
     fn=generate_text,
     inputs="text",
     outputs="text",
     title="Ollama on HF",
+    description="Generate text with Ollama. Call /api/generate from your local app.",
 )
+# FastAPI app: mount Gradio at / and expose /api/generate
+app = FastAPI(title="Ollama on HF")
+app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
+class GenerateBody(BaseModel):
+    prompt: str = ""
+    model: str = OLLAMA_MODEL
+@app.get("/api/generate")
+def api_generate_get(
+    prompt: str = Query(..., description="Text prompt"),
+    model: str = Query(OLLAMA_MODEL, description="Ollama model"),
+):
+    """GET /api/generate?prompt=your+prompt"""
+    return _do_generate((prompt or "").strip(), model)
+@app.post("/api/generate")
+def api_generate_post(body: GenerateBody = Body(...)):
+    """POST /api/generate with JSON {"prompt": "...", "model": "tinyllama"}"""
+    return _do_generate((body.prompt or "").strip(), body.model or OLLAMA_MODEL)
+def _do_generate(prompt: str, model: str):
+    """Shared logic for GET and POST /api/generate."""
+    if not prompt:
+        return JSONResponse(
+            status_code=400,
+            content={"error": "prompt is required and cannot be empty"},
+        )
+    payload = {"model": model, "prompt": prompt, "stream": False}
+    try:
+        r = requests.post(API_URL, json=payload, timeout=120)
+        r.raise_for_status()
+        data = r.json()
+        return {
+            "response": data.get("response", ""),
+            "model": data.get("model", model),
+            "done": data.get("done", True),
+        }
+    except requests.ConnectionError:
+        return JSONResponse(
+            status_code=503,
+            content={"error": "Ollama is not ready. Wait for the Space to finish loading."},
+        )
+    except requests.HTTPError as e:
+        return JSONResponse(
+            status_code=e.response.status_code,
+            content={"error": e.response.text or str(e)},
+        )
+@app.get("/api/status")
+def api_status():
+    """Check if Ollama is up and list models."""
+    try:
+        r = requests.get(f"{OLLAMA_HOST}/api/tags", timeout=5)
+        r.raise_for_status()
+        data = r.json()
+        models = [m.get("name", "") for m in data.get("models", [])]
+        return {"status": "ok", "ollama": "up", "models": models}
+    except Exception as e:
+        return {"status": "error", "ollama": "down", "models": [], "detail": str(e)}
+# Mount Gradio at root (must be last so /api/* are matched first)
+app = gr.mount_gradio_app(app, demo, path="/")

requirements.txt CHANGED Viewed

@@ -1,2 +1,4 @@
 gradio>=4.0.0
 requests>=2.28.0

+fastapi>=0.104.0
+uvicorn[standard]>=0.24.0
 gradio>=4.0.0
 requests>=2.28.0

start.sh CHANGED Viewed

@@ -18,5 +18,5 @@ done
 # Pull default model if not present (tinyllama for free-tier CPU)
 ollama pull "${OLLAMA_MODEL:-tinyllama}" || true
-# Run Gradio app on port 7860 (web UI + API for local app)
-exec python app.py

 # Pull default model if not present (tinyllama for free-tier CPU)
 ollama pull "${OLLAMA_MODEL:-tinyllama}" || true
+# Run FastAPI + Gradio on 7860 (UI at /, API at /api/generate)
+exec uvicorn app:app --host 0.0.0.0 --port 7860