from fastapi import FastAPI, Request from fastapi.responses import StreamingResponse from huggingface_hub import InferenceClient import json import os app = FastAPI() # تأكد من وضع HF_TOKEN في الـ Secrets في إعدادات الـ Space client = InferenceClient(api_key=os.getenv("HF_TOKEN")) @app.get("/") def read_root(): return {"status": "DEV_STOREOMALL_V10_ACTIVE"} @app.post("/v1/chat/completions") async def chat_completions(request: Request): data = await request.json() def stream_generator(): # استدعاء موديل Qwen 3 Coder 40B أو Qwen 2.5 stream = client.chat_completion( model="Qwen/Qwen2.5-Coder-32B-Instruct", messages=data["messages"], max_tokens=data.get("max_tokens", 3000), temperature=data.get("temperature", 0.7), stream=True ) for chunk in stream: # تنسيق البيانات ليتوافق مع كود الـ PHP الخاص بك yield f"data: {json.dumps(chunk)}\n\n" yield "data: [DONE]\n\n" return StreamingResponse(stream_generator(), media_type="text/event-stream")