truegleai commited on
Commit
f6ba6be
·
1 Parent(s): b935497

Add Qwen Coder API

Browse files
Files changed (3) hide show
  1. Dockerfile +17 -0
  2. app.py +48 -0
  3. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user requirements.txt .
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ RUN pip install --no-cache-dir https://huggingface.co/Luigi/llama-cpp-python-wheels-hf-spaces-free-cpu/resolve/main/llama_cpp_python-0.3.22-cp310-cp310-linux_x86_64.whl
13
+
14
+ COPY --chown=user . .
15
+
16
+ EXPOSE 7860
17
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
+ from llama_cpp import Llama
5
+ from huggingface_hub import hf_hub_download
6
+ import os
7
+
8
+ app = FastAPI()
9
+
10
+ REPO_ID = "prithivMLmods/Qwen2.5-Coder-7B-Instruct-GGUF"
11
+ FILENAME = "Qwen2.5-Coder-7B-Instruct.Q4_K_M.gguf"
12
+
13
+ print(f"Downloading {FILENAME} from {REPO_ID} ...")
14
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
15
+ print(f"Loading model from {model_path} ...")
16
+ llm = Llama(model_path=model_path, n_ctx=4096, n_threads=2, verbose=False)
17
+ print("Model ready")
18
+
19
+ class ChatMessage(BaseModel):
20
+ role: str
21
+ content: str
22
+
23
+ class ChatRequest(BaseModel):
24
+ model: str = "qwen-coder"
25
+ messages: list[ChatMessage]
26
+ max_tokens: int = 1024
27
+ temperature: float = 0.7
28
+
29
+ @app.post("/v1/chat/completions")
30
+ async def chat_completions(request: ChatRequest):
31
+ try:
32
+ response = llm.create_chat_completion(
33
+ messages=[msg.dict() for msg in request.messages],
34
+ max_tokens=request.max_tokens,
35
+ temperature=request.temperature,
36
+ stop=["<|im_end|>"]
37
+ )
38
+ return response
39
+ except Exception as e:
40
+ raise HTTPException(status_code=500, detail=str(e))
41
+
42
+ @app.get("/")
43
+ async def root():
44
+ return {"message": "Qwen Code API is running"}
45
+
46
+ @app.get("/health")
47
+ async def health():
48
+ return {"status": "ok"}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ llama-cpp-python
4
+ huggingface-hub