MaenGit commited on
Commit
3a5fe25
·
1 Parent(s): a1035b9

init commit

Browse files
Files changed (4) hide show
  1. .vscode/settings.json +5 -0
  2. Dockerfile +22 -0
  3. app.py +48 -0
  4. requirements.txt +3 -0
.vscode/settings.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "python-envs.defaultEnvManager": "ms-python.python:conda",
3
+ "python-envs.defaultPackageManager": "ms-python.python:conda",
4
+ "python-envs.pythonProjects": []
5
+ }
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ollama/ollama
2
+
3
+ # 🔴 IMPORTANT: remove ollama entrypoint
4
+ ENTRYPOINT []
5
+
6
+ WORKDIR /app
7
+
8
+ RUN apt-get update && \
9
+ apt-get install -y python3-venv python3-pip build-essential libffi-dev libssl-dev && \
10
+ rm -rf /var/lib/apt/lists/*
11
+
12
+ RUN python3 -m venv /opt/venv
13
+ ENV PATH="/opt/venv/bin:$PATH"
14
+
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ COPY . .
19
+
20
+ EXPOSE 8000
21
+
22
+ CMD sh -c "ollama serve & sleep 5 && ollama pull qwen2.5:1.5b && uvicorn app:app --host 0.0.0.0 --port $PORT"
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.responses import StreamingResponse
3
+ import httpx
4
+ import json
5
+ import asyncio
6
+
7
+ app = FastAPI()
8
+
9
+
10
+ OLLAMA_URL = "http://localhost:11434/api/chat"
11
+ # OLLAMA_URL = "http://localhost:11434/api/generate"
12
+
13
+ @app.post("/v1/chat/completions")
14
+ async def chat(req: Request):
15
+ body = await req.json()
16
+ messages = body.get("messages", [])
17
+
18
+ payload = {
19
+ "model": "qwen2.5:1.5b",
20
+ "messages": messages,
21
+ "stream": True,
22
+ "options": {
23
+ "temperature": 0.8,
24
+ "top_p": 0.9,
25
+ }
26
+ }
27
+
28
+ async def event_stream():
29
+
30
+ async with httpx.AsyncClient(timeout=None) as client:
31
+ async with client.stream("POST", OLLAMA_URL, json=payload) as response:
32
+
33
+ async for line in response.aiter_lines():
34
+ if not line:
35
+ continue
36
+ try:
37
+ chunk = json.loads(line)
38
+ if 'message' in chunk and 'content' in chunk['message']:
39
+ content = chunk['message']['content']
40
+
41
+ yield content
42
+
43
+ if chunk.get("done"):
44
+ break
45
+ except json.JSONDecodeError:
46
+ continue
47
+
48
+ return StreamingResponse(event_stream(), media_type="text/plain")
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ httpx