Spaces:

truegleai
/

qwen-coder-api

Sleeping

truegleai commited on Apr 17

Commit

f6ba6be

1 Parent(s): b935497

Add Qwen Coder API

Files changed (3) hide show

Dockerfile ADDED Viewed

+FROM python:3.10-slim
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user requirements.txt .
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+RUN pip install --no-cache-dir https://huggingface.co/Luigi/llama-cpp-python-wheels-hf-spaces-free-cpu/resolve/main/llama_cpp_python-0.3.22-cp310-cp310-linux_x86_64.whl
+COPY --chown=user . .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

+import json
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+import os
+app = FastAPI()
+REPO_ID = "prithivMLmods/Qwen2.5-Coder-7B-Instruct-GGUF"
+FILENAME = "Qwen2.5-Coder-7B-Instruct.Q4_K_M.gguf"
+print(f"Downloading {FILENAME} from {REPO_ID} ...")
+model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
+print(f"Loading model from {model_path} ...")
+llm = Llama(model_path=model_path, n_ctx=4096, n_threads=2, verbose=False)
+print("Model ready")
+class ChatMessage(BaseModel):
+    role: str
+    content: str
+class ChatRequest(BaseModel):
+    model: str = "qwen-coder"
+    messages: list[ChatMessage]
+    max_tokens: int = 1024
+    temperature: float = 0.7
+@app.post("/v1/chat/completions")
+async def chat_completions(request: ChatRequest):
+    try:
+        response = llm.create_chat_completion(
+            messages=[msg.dict() for msg in request.messages],
+            max_tokens=request.max_tokens,
+            temperature=request.temperature,
+            stop=["<|im_end|>"]
+        )
+        return response
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/")
+async def root():
+    return {"message": "Qwen Code API is running"}
+@app.get("/health")
+async def health():
+    return {"status": "ok"}

requirements.txt ADDED Viewed

+fastapi
+uvicorn
+llama-cpp-python
+huggingface-hub