Muglu commited on
Commit
0a3bf97
·
1 Parent(s): bac9742

apiExpose

Browse files
Files changed (3) hide show
  1. app.py +79 -4
  2. requirements.txt +2 -0
  3. start.sh +2 -2
app.py CHANGED
@@ -1,10 +1,14 @@
1
  """
2
- Gradio app that proxies generation to a local Ollama instance.
3
- Exposes a web UI and Gradio's API (e.g. /api/predict) for your local app.
4
  """
5
  import os
6
  import requests
7
  import gradio as gr
 
 
 
 
8
 
9
  OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "http://127.0.0.1:11434")
10
  OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "tinyllama")
@@ -25,11 +29,82 @@ def generate_text(prompt):
25
  except requests.RequestException as e:
26
  return f"Error: {e}"
27
 
 
 
28
  demo = gr.Interface(
29
  fn=generate_text,
30
  inputs="text",
31
  outputs="text",
32
  title="Ollama on HF",
33
- description="Generate text with Ollama. Use this UI or call the Gradio API from your local app.",
34
  )
35
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Gradio UI + FastAPI /api/generate so the Space serves both the web UI and the API.
3
+ Your local app can call: GET/POST .../api/generate
4
  """
5
  import os
6
  import requests
7
  import gradio as gr
8
+ from fastapi import FastAPI, Query, Body
9
+ from fastapi.responses import JSONResponse
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from pydantic import BaseModel
12
 
13
  OLLAMA_HOST = os.environ.get("OLLAMA_HOST", "http://127.0.0.1:11434")
14
  OLLAMA_MODEL = os.environ.get("OLLAMA_MODEL", "tinyllama")
 
29
  except requests.RequestException as e:
30
  return f"Error: {e}"
31
 
32
+
33
+ # Gradio app (mounted at /)
34
  demo = gr.Interface(
35
  fn=generate_text,
36
  inputs="text",
37
  outputs="text",
38
  title="Ollama on HF",
39
+ description="Generate text with Ollama. Call /api/generate from your local app.",
40
  )
41
+
42
+ # FastAPI app: mount Gradio at / and expose /api/generate
43
+ app = FastAPI(title="Ollama on HF")
44
+ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
45
+
46
+
47
+ class GenerateBody(BaseModel):
48
+ prompt: str = ""
49
+ model: str = OLLAMA_MODEL
50
+
51
+
52
+ @app.get("/api/generate")
53
+ def api_generate_get(
54
+ prompt: str = Query(..., description="Text prompt"),
55
+ model: str = Query(OLLAMA_MODEL, description="Ollama model"),
56
+ ):
57
+ """GET /api/generate?prompt=your+prompt"""
58
+ return _do_generate((prompt or "").strip(), model)
59
+
60
+
61
+ @app.post("/api/generate")
62
+ def api_generate_post(body: GenerateBody = Body(...)):
63
+ """POST /api/generate with JSON {"prompt": "...", "model": "tinyllama"}"""
64
+ return _do_generate((body.prompt or "").strip(), body.model or OLLAMA_MODEL)
65
+
66
+
67
+ def _do_generate(prompt: str, model: str):
68
+ """Shared logic for GET and POST /api/generate."""
69
+ if not prompt:
70
+ return JSONResponse(
71
+ status_code=400,
72
+ content={"error": "prompt is required and cannot be empty"},
73
+ )
74
+ payload = {"model": model, "prompt": prompt, "stream": False}
75
+ try:
76
+ r = requests.post(API_URL, json=payload, timeout=120)
77
+ r.raise_for_status()
78
+ data = r.json()
79
+ return {
80
+ "response": data.get("response", ""),
81
+ "model": data.get("model", model),
82
+ "done": data.get("done", True),
83
+ }
84
+ except requests.ConnectionError:
85
+ return JSONResponse(
86
+ status_code=503,
87
+ content={"error": "Ollama is not ready. Wait for the Space to finish loading."},
88
+ )
89
+ except requests.HTTPError as e:
90
+ return JSONResponse(
91
+ status_code=e.response.status_code,
92
+ content={"error": e.response.text or str(e)},
93
+ )
94
+
95
+
96
+ @app.get("/api/status")
97
+ def api_status():
98
+ """Check if Ollama is up and list models."""
99
+ try:
100
+ r = requests.get(f"{OLLAMA_HOST}/api/tags", timeout=5)
101
+ r.raise_for_status()
102
+ data = r.json()
103
+ models = [m.get("name", "") for m in data.get("models", [])]
104
+ return {"status": "ok", "ollama": "up", "models": models}
105
+ except Exception as e:
106
+ return {"status": "error", "ollama": "down", "models": [], "detail": str(e)}
107
+
108
+
109
+ # Mount Gradio at root (must be last so /api/* are matched first)
110
+ app = gr.mount_gradio_app(app, demo, path="/")
requirements.txt CHANGED
@@ -1,2 +1,4 @@
 
 
1
  gradio>=4.0.0
2
  requests>=2.28.0
 
1
+ fastapi>=0.104.0
2
+ uvicorn[standard]>=0.24.0
3
  gradio>=4.0.0
4
  requests>=2.28.0
start.sh CHANGED
@@ -18,5 +18,5 @@ done
18
  # Pull default model if not present (tinyllama for free-tier CPU)
19
  ollama pull "${OLLAMA_MODEL:-tinyllama}" || true
20
 
21
- # Run Gradio app on port 7860 (web UI + API for local app)
22
- exec python app.py
 
18
  # Pull default model if not present (tinyllama for free-tier CPU)
19
  ollama pull "${OLLAMA_MODEL:-tinyllama}" || true
20
 
21
+ # Run FastAPI + Gradio on 7860 (UI at /, API at /api/generate)
22
+ exec uvicorn app:app --host 0.0.0.0 --port 7860