Spaces:
Build error
Build error
File size: 2,090 Bytes
fa3e5a5 7d3ccee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | # M-Wellness Reasoning API v1.0.1
import os
import json
from flask import Flask, request, Response, stream_with_context
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
app = Flask(__name__)
# Model: DeepSeek-R1-Distill-Qwen-7B (The best balance for 16GB RAM Reasoning)
# We use the Q4_K_M quantization to fit in ~5GB RAM, leaving plenty of room for 32k context!
print("Downloading DeepSeek-R1 Reasoning model...")
model_path = hf_hub_download(
repo_id="unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF",
filename="DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf",
token=os.getenv("HF_TOKEN")
)
# Initialize with 32k context window and 2 CPU threads
print("Loading model into RAM...")
llm = Llama(
model_path=model_path,
n_ctx=32768,
n_threads=2,
n_batch=512
)
@app.route("/")
def home():
return "M-Wellness Reasoning API is online."
@app.route("/v1/chat/completions", methods=["POST"])
def chat_completions():
# Compatible with OpenAI/Claude Code/Ollama API format
data = request.json
messages = data.get("messages", [])
stream = data.get("stream", False)
# Simple prompt formatting for DeepSeek-R1
prompt = ""
for msg in messages:
role = msg.get("role", "user")
content = msg.get("content", "")
prompt += f"\n\n{role.capitalize()}: {content}"
prompt += "\n\nAssistant: <thought>\n"
if stream:
def generate():
for chunk in llm(prompt, max_tokens=4096, stream=True):
text = chunk["choices"][0]["text"]
yield f"data: {json.dumps({'choices': [{'delta': {'content': text}}]})}\n\n"
yield "data: [DONE]\n\n"
return Response(stream_with_context(generate()), mimetype="text/event-stream")
else:
output = llm(prompt, max_tokens=4096)
return json.dumps({
"choices": [{"message": {"role": "assistant", "content": output["choices"][0]["text"]}}]
})
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)
|