Spaces:
Build error
Build error
| # M-Wellness Reasoning API v1.0.1 | |
| import os | |
| import json | |
| from flask import Flask, request, Response, stream_with_context | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| app = Flask(__name__) | |
| # Model: DeepSeek-R1-Distill-Qwen-7B (The best balance for 16GB RAM Reasoning) | |
| # We use the Q4_K_M quantization to fit in ~5GB RAM, leaving plenty of room for 32k context! | |
| print("Downloading DeepSeek-R1 Reasoning model...") | |
| model_path = hf_hub_download( | |
| repo_id="unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF", | |
| filename="DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf", | |
| token=os.getenv("HF_TOKEN") | |
| ) | |
| # Initialize with 32k context window and 2 CPU threads | |
| print("Loading model into RAM...") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=32768, | |
| n_threads=2, | |
| n_batch=512 | |
| ) | |
| def home(): | |
| return "M-Wellness Reasoning API is online." | |
| def chat_completions(): | |
| # Compatible with OpenAI/Claude Code/Ollama API format | |
| data = request.json | |
| messages = data.get("messages", []) | |
| stream = data.get("stream", False) | |
| # Simple prompt formatting for DeepSeek-R1 | |
| prompt = "" | |
| for msg in messages: | |
| role = msg.get("role", "user") | |
| content = msg.get("content", "") | |
| prompt += f"\n\n{role.capitalize()}: {content}" | |
| prompt += "\n\nAssistant: <thought>\n" | |
| if stream: | |
| def generate(): | |
| for chunk in llm(prompt, max_tokens=4096, stream=True): | |
| text = chunk["choices"][0]["text"] | |
| yield f"data: {json.dumps({'choices': [{'delta': {'content': text}}]})}\n\n" | |
| yield "data: [DONE]\n\n" | |
| return Response(stream_with_context(generate()), mimetype="text/event-stream") | |
| else: | |
| output = llm(prompt, max_tokens=4096) | |
| return json.dumps({ | |
| "choices": [{"message": {"role": "assistant", "content": output["choices"][0]["text"]}}] | |
| }) | |
| if __name__ == "__main__": | |
| app.run(host="0.0.0.0", port=7860) | |