oki692 commited on
Commit
7c79f25
·
verified ·
1 Parent(s): 0e5744e

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.responses import StreamingResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from pydantic import BaseModel
5
+ import httpx
6
+ import os
7
+ import asyncio
8
+ import json
9
+
10
+ app = FastAPI(title="Ollama Streaming API", version="1.0.0")
11
+
12
+ # CORS middleware for browser access
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"],
16
+ allow_credentials=True,
17
+ allow_methods=["*"],
18
+ allow_headers=["*"],
19
+ )
20
+
21
+ # Hardcoded configuration
22
+ MODEL_NAME = "deepseek-r1:1.5b"
23
+ CONNECT_KEY = "manus-ollama-2024"
24
+ OLLAMA_BASE_URL = "http://localhost:11434"
25
+
26
+ class ChatRequest(BaseModel):
27
+ prompt: str
28
+ key: str
29
+
30
+ class HealthResponse(BaseModel):
31
+ status: str
32
+ model: str
33
+ endpoint: str
34
+
35
+ @app.get("/", response_model=HealthResponse)
36
+ async def root():
37
+ """Health check endpoint"""
38
+ space_url = os.getenv("SPACE_URL", "http://localhost:7860")
39
+ return HealthResponse(
40
+ status="online",
41
+ model=MODEL_NAME,
42
+ endpoint=space_url
43
+ )
44
+
45
+ @app.get("/health")
46
+ async def health():
47
+ """Detailed health check"""
48
+ try:
49
+ async with httpx.AsyncClient(timeout=5.0) as client:
50
+ response = await client.get(f"{OLLAMA_BASE_URL}/api/tags")
51
+ if response.status_code == 200:
52
+ return {"status": "healthy", "ollama": "connected", "model": MODEL_NAME}
53
+ except Exception as e:
54
+ return {"status": "degraded", "ollama": "disconnected", "error": str(e)}
55
+
56
+ async def generate_stream(prompt: str):
57
+ """Generate streaming response from Ollama"""
58
+ try:
59
+ async with httpx.AsyncClient(timeout=300.0) as client:
60
+ payload = {
61
+ "model": MODEL_NAME,
62
+ "prompt": prompt,
63
+ "stream": True,
64
+ "options": {
65
+ "temperature": 0.7,
66
+ "num_predict": 2048,
67
+ "top_k": 40,
68
+ "top_p": 0.9,
69
+ }
70
+ }
71
+
72
+ async with client.stream(
73
+ "POST",
74
+ f"{OLLAMA_BASE_URL}/api/generate",
75
+ json=payload,
76
+ timeout=300.0
77
+ ) as response:
78
+ if response.status_code != 200:
79
+ yield f"data: {json.dumps({'error': 'Ollama API error'})}\n\n"
80
+ return
81
+
82
+ async for line in response.aiter_lines():
83
+ if line.strip():
84
+ try:
85
+ data = json.loads(line)
86
+ if "response" in data:
87
+ yield f"data: {json.dumps({'text': data['response'], 'done': data.get('done', False)})}\n\n"
88
+ if data.get("done", False):
89
+ break
90
+ except json.JSONDecodeError:
91
+ continue
92
+
93
+ except Exception as e:
94
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
95
+
96
+ @app.post("/stream")
97
+ async def stream_chat(request: ChatRequest):
98
+ """Stream chat completions with key authentication"""
99
+ if request.key != CONNECT_KEY:
100
+ raise HTTPException(status_code=403, detail="Invalid connect key")
101
+
102
+ if not request.prompt or len(request.prompt.strip()) == 0:
103
+ raise HTTPException(status_code=400, detail="Prompt cannot be empty")
104
+
105
+ return StreamingResponse(
106
+ generate_stream(request.prompt),
107
+ media_type="text/event-stream",
108
+ headers={
109
+ "Cache-Control": "no-cache",
110
+ "Connection": "keep-alive",
111
+ "X-Accel-Buffering": "no"
112
+ }
113
+ )
114
+
115
+ @app.get("/models")
116
+ async def list_models():
117
+ """List available models"""
118
+ return {"models": [MODEL_NAME], "default": MODEL_NAME}
119
+
120
+ if __name__ == "__main__":
121
+ import uvicorn
122
+ uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")