Spaces:

ibrahimlasfar
/

mgpt

Runtime error

App Files Files Community

ibrahimlasfar commited on Aug 31, 2025

Commit

2ee9112

1 Parent(s): 151b25b

Update chatbot with audio/image buttons and fixed models

Browse files

Files changed (5) hide show

api/endpoints.py +22 -22
api/models.py +1 -1
main.py +45 -44
utils/generation.py +38 -136
utils/web_search.py +4 -15

api/endpoints.py CHANGED Viewed

@@ -1,22 +1,24 @@
 import os
 from fastapi import APIRouter, HTTPException, UploadFile, File
 from openai import OpenAI
 from api.models import QueryRequest
 from utils.generation import request_generation, select_model
-from utils.web_search import web_search
 router = APIRouter()
 HF_TOKEN = os.getenv("HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
 @router.get("/api/model-info")
 def model_info():
     return {
         "model_name": MODEL_NAME,
-        "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"),
-        "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
         "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
         "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
         "api_base": API_ENDPOINT,
@@ -45,16 +47,14 @@ async def chat_endpoint(req: QueryRequest):
         max_new_tokens=req.max_new_tokens,
         deep_search=req.enable_browsing,
     )
-    response = "".join(list(stream))
     return {"response": response}
-# في api/endpoints.py
 @router.post("/api/audio-transcription")
 async def audio_transcription_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
     audio_data = await file.read()
-    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Transcribe audio",
@@ -64,7 +64,7 @@ async def audio_transcription_endpoint(file: UploadFile = File(...)):
         max_new_tokens=128000,
         input_type="audio",
         audio_data=audio_data,
-    )))
     return {"transcription": response}
 @router.post("/api/text-to-speech")
@@ -81,7 +81,7 @@ async def text_to_speech_endpoint(req: dict):
         max_new_tokens=128000,
         input_type="text",
     )
-    audio_data = b"".join(list(response))
     return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
 @router.post("/api/code")
@@ -91,7 +91,7 @@ async def code_endpoint(req: dict):
     code = req.get("code", "")
     prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
     model_name, api_endpoint = select_model(prompt)
-    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=prompt,
@@ -99,14 +99,14 @@ async def code_endpoint(req: dict):
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-    )))
     return {"generated_code": response}
 @router.post("/api/analysis")
 async def analysis_endpoint(req: dict):
     message = req.get("text", "")
     model_name, api_endpoint = select_model(message)
-    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=message,
@@ -114,24 +114,24 @@ async def analysis_endpoint(req: dict):
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-    )))
     return {"analysis": response}
 @router.post("/api/image-analysis")
-async def image_analysis_endpoint(req: dict):
-    image_url = req.get("image_url", "")
-    task = req.get("task", "describe")
-    prompt = f"Perform the following task on the image at {image_url}: {task}"
-    model_name, api_endpoint = select_model(prompt)
-    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
-        message=prompt,
         system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-    )))
     return {"image_analysis": response}
 @router.get("/api/test-model")

 import os
 from fastapi import APIRouter, HTTPException, UploadFile, File
+from fastapi.responses import StreamingResponse
+import io
 from openai import OpenAI
 from api.models import QueryRequest
 from utils.generation import request_generation, select_model
 router = APIRouter()
 HF_TOKEN = os.getenv("HF_TOKEN")
+BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:together")
 @router.get("/api/model-info")
 def model_info():
     return {
         "model_name": MODEL_NAME,
+        "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B:featherless-ai"),
+        "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "openai/gpt-oss-120b:cerebras"),
         "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
         "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
         "api_base": API_ENDPOINT,
         max_new_tokens=req.max_new_tokens,
         deep_search=req.enable_browsing,
     )
+    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"response": response}
 @router.post("/api/audio-transcription")
 async def audio_transcription_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
     audio_data = await file.read()
+    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Transcribe audio",
         max_new_tokens=128000,
         input_type="audio",
         audio_data=audio_data,
+    ) if isinstance(chunk, str)])
     return {"transcription": response}
 @router.post("/api/text-to-speech")
         max_new_tokens=128000,
         input_type="text",
     )
+    audio_data = b"".join([chunk for chunk in response if isinstance(chunk, bytes)])
     return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
 @router.post("/api/code")
     code = req.get("code", "")
     prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
     model_name, api_endpoint = select_model(prompt)
+    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=prompt,
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+    ) if isinstance(chunk, str)])
     return {"generated_code": response}
 @router.post("/api/analysis")
 async def analysis_endpoint(req: dict):
     message = req.get("text", "")
     model_name, api_endpoint = select_model(message)
+    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=message,
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+    ) if isinstance(chunk, str)])
     return {"analysis": response}
 @router.post("/api/image-analysis")
+async def image_analysis_endpoint(file: UploadFile = File(...)):
+    model_name, api_endpoint = select_model("analyze image", input_type="image")
+    image_data = await file.read()
+    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
+        message="Analyze this image",
         system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+        input_type="image",
+        image_data=image_data,
+    ) if isinstance(chunk, str)])
     return {"image_analysis": response}
 @router.get("/api/test-model")

api/models.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import List, Optional
 class QueryRequest(BaseModel):
     message: str
-    system_prompt: str = "You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, and image inputs. Transcribe audio using Whisper, convert text to speech using Parler-TTS, and analyze images using CLIP. Respond with text or audio based on input type. Continue until the query is fully addressed."
     history: Optional[List[dict]] = None
     temperature: float = 0.7
     max_new_tokens: int = 128000

 class QueryRequest(BaseModel):
     message: str
+    system_prompt: str = "You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image inputs. For audio, transcribe using Whisper. For text-to-speech, use Parler-TTS. For images, analyze using CLIP. Respond with voice output when requested. Continue until the query is fully addressed."
     history: Optional[List[dict]] = None
     temperature: float = 0.7
     max_new_tokens: int = 128000

main.py CHANGED Viewed

@@ -29,32 +29,29 @@ if not HF_TOKEN:
 QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
 CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
-# إعداد CSS محسّن
 css = """
 .gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
 .chatbot { border: 1px solid #ccc; border-radius: 12px; padding: 20px; background-color: #f5f5f5; }
 .input-textbox { font-size: 16px; padding: 12px; border-radius: 8px; }
-.upload-button, .audio-button, .camera-button {
-    background-color: #007bff; color: white; padding: 10px 20px; border-radius: 8px;
-    display: inline-flex; align-items: center; gap: 8px; font-size: 16px;
 }
-.upload-button::before { content: '📷'; font-size: 20px; }
-.audio-button::before { content: '🎤'; font-size: 20px; }
-.camera-button::before { content: '📸'; font-size: 20px; }
-.audio-output-container {
-    display: flex; align-items: center; gap: 12px; margin-top: 15px;
-    background-color: #e9ecef; padding: 10px; border-radius: 8px;
-}
-.audio-output-container::before { content: '🔊'; font-size: 20px; }
 .loading::after {
-    content: ''; display: inline-block; width: 18px; height: 18px;
-    border: 3px solid #007bff; border-top-color: transparent;
-    border-radius: 50%; animation: spin 1s linear infinite; margin-left: 10px;
 }
 @keyframes spin { to { transform: rotate(360deg); } }
-.output-container {
-    margin-top: 20px; padding: 15px; border: 1px solid #ddd;
-    border-radius: 10px; background-color: white;
 }
 """
@@ -70,7 +67,7 @@ def process_input(message, audio_input=None, image_input=None, history=None, sys
     elif image_input:
         input_type = "image"
         image_data = image_input
-        message = f"Analyze image: {message or 'describe this image'}"
     response_text = ""
     audio_response = None
@@ -93,47 +90,56 @@ def process_input(message, audio_input=None, image_input=None, history=None, sys
             response_text += chunk
         yield response_text, audio_response
 # إعداد واجهة Gradio
-chatbot_ui = gr.ChatInterface(
     fn=process_input,
-    chatbot=gr.Chatbot(
-        label="MGZon Chatbot",
-        height=800,
-        latex_delimiters=LATEX_DELIMS,
-        elem_classes="chatbot",
-    ),
-    additional_inputs_accordion=gr.Accordion("⚙️ Settings", open=True),
-    additional_inputs=[
         gr.Textbox(
             label="System Prompt",
-            value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image inputs. Transcribe audio using Whisper, convert text to speech using Parler-TTS, and analyze images using CLIP. Respond with text or audio based on input type. Continue until the query is fully addressed.",
             lines=4
         ),
         gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
         gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
         gr.Checkbox(label="Enable DeepSearch", value=True),
         gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
-        gr.Audio(label="Record Audio", source="microphone", type="numpy", elem_classes="audio-button"),
-        gr.Image(label="Capture Image", source="webcam", type="numpy", elem_classes="camera-button"),
-        gr.File(label="Upload Image/File", file_types=["image", ".pdf", ".txt"], elem_classes="upload-button"),
     ],
-    additional_outputs=[gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output-container", autoplay=True)],
-    stop_btn="Stop",
     examples=[
         ["Explain the history of AI in detail."],
-        ["Generate a React login component with validation."],
-        ["Describe this image: [capture or upload image]"],
-        ["Transcribe this audio: [record audio]"],
-        ["Convert to speech: Hello, welcome to MGZon!"],
     ],
     title="MGZon Chatbot",
-    description="A versatile chatbot powered by Hugging Face models for text, image, and audio queries. Supports real-time audio recording, webcam image capture, and web search. Licensed under Apache 2.0.",
     theme="gradio/soft",
     css=css,
 )
 # إعداد FastAPI
 app = FastAPI(title="MGZon Chatbot API")
 # ربط Gradio مع FastAPI
 app = gr.mount_gradio_app(app, chatbot_ui, path="/gradio")
@@ -157,27 +163,22 @@ class NotFoundMiddleware(BaseHTTPMiddleware):
 app.add_middleware(NotFoundMiddleware)
-# Root endpoint
 @app.get("/", response_class=HTMLResponse)
 async def root(request: Request):
     return templates.TemplateResponse("index.html", {"request": request})
-# Docs endpoint
 @app.get("/docs", response_class=HTMLResponse)
 async def docs(request: Request):
     return templates.TemplateResponse("docs.html", {"request": request})
-# Swagger UI endpoint
 @app.get("/swagger", response_class=HTMLResponse)
 async def swagger_ui():
     return get_swagger_ui_html(openapi_url="/openapi.json", title="MGZon API Documentation")
-# Redirect لـ /gradio
 @app.get("/launch-chatbot", response_class=RedirectResponse)
 async def launch_chatbot():
     return RedirectResponse(url="/gradio", status_code=302)
-# تشغيل الخادم
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))

 QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
 CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
+# إعداد CSS
 css = """
 .gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
 .chatbot { border: 1px solid #ccc; border-radius: 12px; padding: 20px; background-color: #f5f5f5; }
 .input-textbox { font-size: 16px; padding: 12px; border-radius: 8px; }
+.upload-button, .capture-button, .record-button {
+    background-color: #4CAF50; color: white; padding: 10px 20px; border-radius: 8px; font-size: 16px; cursor: pointer;
 }
+.upload-button:hover, .capture-button:hover, .record-button:hover { background-color: #45a049; }
+.upload-button::before { content: '📷 '; font-size: 20px; }
+.capture-button::before { content: '🎥 '; font-size: 20px; }
+.record-button::before { content: '🎤 '; font-size: 20px; }
+.audio-output::before { content: '🔊 '; font-size: 20px; }
 .loading::after {
+    content: ''; display: inline-block; width: 18px; height: 18px; border: 3px solid #333;
+    border-top-color: transparent; border-radius: 50%; animation: spin 1s linear infinite; margin-left: 10px;
 }
 @keyframes spin { to { transform: rotate(360deg); } }
+.output-container {
+    margin-top: 20px; padding: 15px; border: 1px solid #ddd; border-radius: 10px; background-color: #fff;
+}
+.audio-output-container {
+    display: flex; align-items: center; gap: 12px; margin-top: 15px;
 }
 """
     elif image_input:
         input_type = "image"
         image_data = image_input
+        message = "Analyze this image"
     response_text = ""
     audio_response = None
             response_text += chunk
         yield response_text, audio_response
+# دالة لتفعيل تسجيل الصوت
+def start_recording():
+    return gr.update(visible=True)
+# دالة لتفعيل التقاط الصورة
+def start_image_capture():
+    return gr.update(visible=True)
 # إعداد واجهة Gradio
+chatbot_ui = gr.Interface(
     fn=process_input,
+    inputs=[
+        gr.Textbox(label="Message", placeholder="Type your message or use buttons below...", elem_classes="input-textbox"),
+        gr.Audio(label="Record Audio", sources=["microphone"], type="numpy", streaming=True, visible=False, elem_classes="record-button"),
+        gr.Image(label="Capture/Upload Image", sources=["webcam", "upload"], type="numpy", visible=False, elem_classes="capture-button"),
+        gr.State(value=[]),  # History
         gr.Textbox(
             label="System Prompt",
+            value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image inputs. For audio, transcribe using Whisper. For text-to-speech, use Parler-TTS. For images, analyze using CLIP. Respond with voice output when requested. Continue until the query is fully addressed.",
             lines=4
         ),
         gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
         gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
         gr.Checkbox(label="Enable DeepSearch", value=True),
         gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
     ],
+    outputs=[
+        gr.Markdown(label="Response", elem_classes="output-container"),
+        gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output", autoplay=True)
+    ],
+    additional_inputs=[
+        gr.Button("Record Audio", elem_classes="record-button", onclick=start_recording),
+        gr.Button("Capture/Upload Image", elem_classes="capture-button", onclick=start_image_capture),
+    ],
     examples=[
         ["Explain the history of AI in detail."],
+        ["Generate a React component for a login form."],
+        ["Transcribe this audio: [record audio]."],
+        ["Convert this text to speech: Hello, welcome to MGZon!"],
+        ["Analyze this image: [capture/upload image]."],
     ],
     title="MGZon Chatbot",
+    description="A versatile chatbot powered by advanced AI models. Supports text, audio, and image inputs with voice responses. Licensed under Apache 2.0.",
     theme="gradio/soft",
     css=css,
 )
 # إعداد FastAPI
 app = FastAPI(title="MGZon Chatbot API")
+app.include_router(api_router)
 # ربط Gradio مع FastAPI
 app = gr.mount_gradio_app(app, chatbot_ui, path="/gradio")
 app.add_middleware(NotFoundMiddleware)
 @app.get("/", response_class=HTMLResponse)
 async def root(request: Request):
     return templates.TemplateResponse("index.html", {"request": request})
 @app.get("/docs", response_class=HTMLResponse)
 async def docs(request: Request):
     return templates.TemplateResponse("docs.html", {"request": request})
 @app.get("/swagger", response_class=HTMLResponse)
 async def swagger_ui():
     return get_swagger_ui_html(openapi_url="/openapi.json", title="MGZon API Documentation")
 @app.get("/launch-chatbot", response_class=RedirectResponse)
 async def launch_chatbot():
     return RedirectResponse(url="/gradio", status_code=302)
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))

utils/generation.py CHANGED Viewed

@@ -15,11 +15,12 @@ import torchaudio
 from PIL import Image
 from transformers import CLIPModel, CLIPProcessor, AutoProcessor
 from parler_tts import ParlerTTSForConditionalGeneration
 logger = logging.getLogger(__name__)
 # إعداد Cache
-cache = TTLCache(maxsize=100, ttl=600)  # Cache بحجم 100 ومدة 10 دقايق
 # تعريف LATEX_DELIMS
 LATEX_DELIMS = [
@@ -31,19 +32,18 @@ LATEX_DELIMS = [
 # إعداد العميل لـ Hugging Face Inference API
 HF_TOKEN = os.getenv("HF_TOKEN")
-BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")  # توكن احتياطي
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
-SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
-TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
 ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3-turbo")
 TTS_MODEL = os.getenv("TTS_MODEL", "parler-tts/parler-tts-mini-v1")
 def check_model_availability(model_name: str, api_base: str, api_key: str) -> tuple[bool, str]:
-    """التحقق من توفر النموذج عبر API مع دعم التوكن الاحتياطي"""
     try:
         response = requests.get(
             f"{api_base}/models/{model_name}",
@@ -66,33 +66,18 @@ def check_model_availability(model_name: str, api_base: str, api_key: str) -> tu
 def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
     query_lower = query.lower()
-    # دعم الصوت
     if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
         logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
         return ASR_MODEL, FALLBACK_API_ENDPOINT
-    # دعم تحويل النص إلى صوت
     if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]):
         logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
         return TTS_MODEL, FALLBACK_API_ENDPOINT
-    # نماذج CLIP للاستعلامات المتعلقة بالصور
-    image_patterns = [
         r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
         r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
-    ]
-    for pattern in image_patterns:
-        if re.search(pattern, query_lower, re.IGNORECASE):
-            logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
-            return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
-    # نموذج DeepSeek للاستعلامات المتعلقة بـ MGZon
-    mgzon_patterns = [
-        r"\bmgzon\b", r"\bmgzon\s+(products|services|platform|features|mission|technology|solutions|oauth)\b",
-        r"\bميزات\s+mgzon\b", r"\bخدمات\s+mgzon\b", r"\boauth\b"
-    ]
-    for pattern in mgzon_patterns:
-        if re.search(pattern, query_lower, re.IGNORECASE):
-            logger.info(f"Selected {SECONDARY_MODEL_NAME} with endpoint {FALLBACK_API_ENDPOINT} for MGZon-related query: {query}")
-            return SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT
-    # النموذج الافتراضي للاستعلامات العامة
     logger.info(f"Selected {MODEL_NAME} with endpoint {API_ENDPOINT} for general query: {query}")
     return MODEL_NAME, API_ENDPOINT
@@ -114,15 +99,11 @@ def request_generation(
     audio_data: Optional[bytes] = None,
     image_data: Optional[bytes] = None,
 ) -> Generator[bytes | str, None, None]:
-    from utils.web_search import web_search  # تأخير الاستيراد
-    # التحقق من توفر النموذج مع دعم التوكن الاحتياطي
     is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
     if not is_available:
         yield f"Error: Model {model_name} is not available. Please check the model endpoint or token."
         return
-    # إنشاء مفتاح للـ cache
     cache_key = hashlib.md5(json.dumps({
         "message": message,
         "system_prompt": system_prompt,
@@ -142,8 +123,7 @@ def request_generation(
     task_type = "general"
     enhanced_system_prompt = system_prompt
-    # معالجة الصوت (ASR)
-    if model_name == ASR_MODEL and audio_data:
         task_type = "audio_transcription"
         try:
             audio_file = io.BytesIO(audio_data)
@@ -165,12 +145,11 @@ def request_generation(
             yield f"Error: Audio transcription failed: {e}"
             return
-    # معالجة تحويل النص إلى صوت (TTS)
     if model_name == TTS_MODEL:
         task_type = "text_to_speech"
         try:
-            model = ParlerTTSForConditionalGeneration.from_pretrained(model_name)
-            processor = AutoProcessor.from_pretrained(model_name)
             inputs = processor(text=message, return_tensors="pt")
             audio = model.generate(**inputs)
             audio_file = io.BytesIO()
@@ -184,12 +163,11 @@ def request_generation(
             yield f"Error: Text-to-speech failed: {e}"
             return
-    # معالجة الصور
-    if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
         task_type = "image_analysis"
         try:
-            model = CLIPModel.from_pretrained(model_name)
-            processor = CLIPProcessor.from_pretrained(model_name)
             image = Image.open(io.BytesIO(image_data)).convert("RGB")
             inputs = processor(text=message, images=image, return_tensors="pt", padding=True)
             outputs = model(**inputs)
@@ -203,28 +181,26 @@ def request_generation(
             yield f"Error: Image analysis failed: {e}"
             return
-    # تحسين system_prompt بناءً على نوع المهمة
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
         task_type = "image"
-        enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query. Continue until the query is fully addressed."
     elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
         task_type = "code"
-        enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations. Support frameworks like React, Django, Flask, and others. Format code with triple backticks (```) and specify the language. Continue until the task is fully addressed."
     elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
         task_type = "analysis"
-        enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights. Continue until all aspects of the query are thoroughly covered."
     elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
         task_type = "review"
-        enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations. Ensure the response is complete and detailed."
     elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
         task_type = "publish"
-        enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices. Provide a complete and detailed response."
     else:
-        enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable. Continue generating content until the query is fully answered, leveraging the full capacity of the model."
-    # إذا كان الاستعلام قصيرًا، شجع على التفصيل
     if len(message.split()) < 5:
-        enhanced_system_prompt += "\nEven for short or general queries, provide a detailed, in-depth response with examples, explanations, and additional context to ensure completeness."
     logger.info(f"Task type detected: {task_type}")
     input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
@@ -313,7 +289,7 @@ def request_generation(
                     reasoning_closed = True
                 if not saw_visible_output:
-                    msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced."
                     if last_tool_name:
                         try:
                             args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
@@ -327,8 +303,8 @@ def request_generation(
                     cached_chunks.append(f"Error: Unknown error")
                     yield f"Error: Unknown error"
                 elif chunk.choices[0].finish_reason == "length":
-                    cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
-                    yield "Response truncated due to token limit. Please refine your query or request continuation."
                 break
         if buffer:
@@ -360,16 +336,13 @@ def request_generation(
             ):
                 yield chunk
             return
-        if model_name == MODEL_NAME:
-            fallback_model = SECONDARY_MODEL_NAME
-            fallback_endpoint = FALLBACK_API_ENDPOINT
-            logger.info(f"Retrying with fallback model: {fallback_model} on {fallback_endpoint}")
             try:
-                is_available, selected_api_key = check_model_availability(fallback_model, fallback_endpoint, selected_api_key)
                 if not is_available:
-                    yield f"Error: Fallback model {fallback_model} is not available."
-                    return
-                client = OpenAI(api_key=selected_api_key, base_url=fallback_endpoint, timeout=120.0)
                 stream = client.chat.completions.create(
                     model=fallback_model,
                     messages=input_messages,
@@ -382,39 +355,18 @@ def request_generation(
                 for chunk in stream:
                     if chunk.choices[0].delta.content:
                         content = chunk.choices[0].delta.content
-                        if content == "<|channel|>analysis<|message|>":
-                            if not reasoning_started:
-                                cached_chunks.append("analysis")
-                                yield "analysis"
-                                reasoning_started = True
-                            continue
-                        if content == "<|channel|>final<|message|>":
-                            if reasoning_started and not reasoning_closed:
-                                cached_chunks.append("assistantfinal")
-                                yield "assistantfinal"
-                                reasoning_closed = True
-                            continue
                         saw_visible_output = True
                         buffer += content
                         if "\n" in buffer or len(buffer) > 5000:
                             cached_chunks.append(buffer)
                             yield buffer
                             buffer = ""
                         continue
                     if chunk.choices[0].finish_reason in ("stop", "error", "length"):
                         if buffer:
                             cached_chunks.append(buffer)
                             yield buffer
                             buffer = ""
-                        if reasoning_started and not reasoning_closed:
-                            cached_chunks.append("assistantfinal")
-                            yield "assistantfinal"
-                            reasoning_closed = True
                         if not saw_visible_output:
                             cached_chunks.append("No visible output produced.")
                             yield "No visible output produced."
@@ -422,69 +374,19 @@ def request_generation(
                             cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
                             yield f"Error: Unknown error with fallback model {fallback_model}"
                         elif chunk.choices[0].finish_reason == "length":
-                            cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
-                            yield "Response truncated due to token limit. Please refine your query or request continuation."
                         break
                 if buffer:
                     cached_chunks.append(buffer)
                     yield buffer
                 cache[cache_key] = cached_chunks
             except Exception as e2:
                 logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
-                try:
-                    is_available, selected_api_key = check_model_availability(TERTIARY_MODEL_NAME, FALLBACK_API_ENDPOINT, selected_api_key)
-                    if not is_available:
-                        yield f"Error: Tertiary model {TERTIARY_MODEL_NAME} is not available."
-                        return
-                    client = OpenAI(api_key=selected_api_key, base_url=FALLBACK_API_ENDPOINT, timeout=120.0)
-                    stream = client.chat.completions.create(
-                        model=TERTIARY_MODEL_NAME,
-                        messages=input_messages,
-                        temperature=temperature,
-                        max_tokens=max_new_tokens,
-                        stream=True,
-                        tools=[],
-                        tool_choice="none",
-                    )
-                    for chunk in stream:
-                        if chunk.choices[0].delta.content:
-                            content = chunk.choices[0].delta.content
-                            saw_visible_output = True
-                            buffer += content
-                            if "\n" in buffer or len(buffer) > 5000:
-                                cached_chunks.append(buffer)
-                                yield buffer
-                                buffer = ""
-                            continue
-                        if chunk.choices[0].finish_reason in ("stop", "error", "length"):
-                            if buffer:
-                                cached_chunks.append(buffer)
-                                yield buffer
-                                buffer = ""
-                            if not saw_visible_output:
-                                cached_chunks.append("No visible output produced.")
-                                yield "No visible output produced."
-                            if chunk.choices[0].finish_reason == "error":
-                                cached_chunks.append(f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}")
-                                yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
-                            elif chunk.choices[0].finish_reason == "length":
-                                cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
-                                yield "Response truncated due to token limit. Please refine your query or request continuation."
-                            break
-                    if buffer:
-                        cached_chunks.append(buffer)
-                        yield buffer
-                    cache[cache_key] = cached_chunks
-                except Exception as e3:
-                    logger.exception(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
-                    yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({fallback_model}), Tertiary ({TERTIARY_MODEL_NAME}). Please check your model configurations."
-                    return
-        else:
-            yield f"Error: Failed to load model {model_name}: {e}"
-            return
 def format_final(analysis_text: str, visible_text: str) -> str:
     reasoning_safe = html.escape((analysis_text or "").strip())
@@ -534,7 +436,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
             "type": "function",
             "function": {
                 "name": "code_generation",
-                "description": "Generate or modify code for various frameworks (React, Django, Flask, etc.)",
                 "parameters": {
                     "type": "object",
                     "properties": {

 from PIL import Image
 from transformers import CLIPModel, CLIPProcessor, AutoProcessor
 from parler_tts import ParlerTTSForConditionalGeneration
+from utils.web_search import web_search  # استيراد مباشر
 logger = logging.getLogger(__name__)
 # إعداد Cache
+cache = TTLCache(maxsize=100, ttl=600)
 # تعريف LATEX_DELIMS
 LATEX_DELIMS = [
 # إعداد العميل لـ Hugging Face Inference API
 HF_TOKEN = os.getenv("HF_TOKEN")
+BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
+MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:together")
+SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B:featherless-ai")
+TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "openai/gpt-oss-120b:cerebras")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
 ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3-turbo")
 TTS_MODEL = os.getenv("TTS_MODEL", "parler-tts/parler-tts-mini-v1")
 def check_model_availability(model_name: str, api_base: str, api_key: str) -> tuple[bool, str]:
     try:
         response = requests.get(
             f"{api_base}/models/{model_name}",
 def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
     query_lower = query.lower()
     if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
         logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
         return ASR_MODEL, FALLBACK_API_ENDPOINT
     if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]):
         logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
         return TTS_MODEL, FALLBACK_API_ENDPOINT
+    if input_type == "image" or any(pattern in query_lower for pattern in [
         r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
         r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
+    ]):
+        logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
+        return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
     logger.info(f"Selected {MODEL_NAME} with endpoint {API_ENDPOINT} for general query: {query}")
     return MODEL_NAME, API_ENDPOINT
     audio_data: Optional[bytes] = None,
     image_data: Optional[bytes] = None,
 ) -> Generator[bytes | str, None, None]:
     is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
     if not is_available:
         yield f"Error: Model {model_name} is not available. Please check the model endpoint or token."
         return
     cache_key = hashlib.md5(json.dumps({
         "message": message,
         "system_prompt": system_prompt,
     task_type = "general"
     enhanced_system_prompt = system_prompt
+    if model_name == ASR_MODEL and audio_data is not None:
         task_type = "audio_transcription"
         try:
             audio_file = io.BytesIO(audio_data)
             yield f"Error: Audio transcription failed: {e}"
             return
     if model_name == TTS_MODEL:
         task_type = "text_to_speech"
         try:
+            model = ParlerTTSForConditionalGeneration.from_pretrained(model_name, token=selected_api_key)
+            processor = AutoProcessor.from_pretrained(model_name, token=selected_api_key)
             inputs = processor(text=message, return_tensors="pt")
             audio = model.generate(**inputs)
             audio_file = io.BytesIO()
             yield f"Error: Text-to-speech failed: {e}"
             return
+    if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data is not None:
         task_type = "image_analysis"
         try:
+            model = CLIPModel.from_pretrained(model_name, token=selected_api_key)
+            processor = CLIPProcessor.from_pretrained(model_name, token=selected_api_key)
             image = Image.open(io.BytesIO(image_data)).convert("RGB")
             inputs = processor(text=message, images=image, return_tensors="pt", padding=True)
             outputs = model(**inputs)
             yield f"Error: Image analysis failed: {e}"
             return
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
         task_type = "image"
+        enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query."
     elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
         task_type = "code"
+        enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations."
     elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
         task_type = "analysis"
+        enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights."
     elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
         task_type = "review"
+        enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations."
     elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
         task_type = "publish"
+        enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices."
     else:
+        enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable."
     if len(message.split()) < 5:
+        enhanced_system_prompt += "\nEven for short queries, provide a detailed, in-depth response with examples and context."
     logger.info(f"Task type detected: {task_type}")
     input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
                     reasoning_closed = True
                 if not saw_visible_output:
+                    msg = "I attempted to call a tool, but tools aren't executed in this environment."
                     if last_tool_name:
                         try:
                             args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
                     cached_chunks.append(f"Error: Unknown error")
                     yield f"Error: Unknown error"
                 elif chunk.choices[0].finish_reason == "length":
+                    cached_chunks.append("Response truncated due to token limit. Please refine your query.")
+                    yield "Response truncated due to token limit. Please refine your query."
                 break
         if buffer:
             ):
                 yield chunk
             return
+        for fallback_model in [SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME]:
+            logger.info(f"Retrying with fallback model: {fallback_model}")
             try:
+                is_available, selected_api_key = check_model_availability(fallback_model, FALLBACK_API_ENDPOINT, selected_api_key)
                 if not is_available:
+                    continue
+                client = OpenAI(api_key=selected_api_key, base_url=FALLBACK_API_ENDPOINT, timeout=120.0)
                 stream = client.chat.completions.create(
                     model=fallback_model,
                     messages=input_messages,
                 for chunk in stream:
                     if chunk.choices[0].delta.content:
                         content = chunk.choices[0].delta.content
                         saw_visible_output = True
                         buffer += content
                         if "\n" in buffer or len(buffer) > 5000:
                             cached_chunks.append(buffer)
                             yield buffer
                             buffer = ""
                         continue
                     if chunk.choices[0].finish_reason in ("stop", "error", "length"):
                         if buffer:
                             cached_chunks.append(buffer)
                             yield buffer
                             buffer = ""
                         if not saw_visible_output:
                             cached_chunks.append("No visible output produced.")
                             yield "No visible output produced."
                             cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
                             yield f"Error: Unknown error with fallback model {fallback_model}"
                         elif chunk.choices[0].finish_reason == "length":
+                            cached_chunks.append("Response truncated due to token limit.")
+                            yield "Response truncated due to token limit."
                         break
                 if buffer:
                     cached_chunks.append(buffer)
                     yield buffer
                 cache[cache_key] = cached_chunks
+                return
             except Exception as e2:
                 logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
+                continue
+        yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({SECONDARY_MODEL_NAME}), Tertiary ({TERTIARY_MODEL_NAME})."
+        return
 def format_final(analysis_text: str, visible_text: str) -> str:
     reasoning_safe = html.escape((analysis_text or "").strip())
             "type": "function",
             "function": {
                 "name": "code_generation",
+                "description": "Generate or modify code for various frameworks",
                 "parameters": {
                     "type": "object",
                     "properties": {

utils/web_search.py CHANGED Viewed

@@ -10,31 +10,20 @@ def web_search(query: str) -> str:
         google_api_key = os.getenv("GOOGLE_API_KEY")
         google_cse_id = os.getenv("GOOGLE_CSE_ID")
         if not google_api_key or not google_cse_id:
-            logger.warning("GOOGLE_API_KEY or GOOGLE_CSE_ID not set.")
             return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
         url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
-        response = requests.get(url, timeout=10)
         response.raise_for_status()
         results = response.json().get("items", [])
         if not results:
-            logger.info(f"No web results found for query: {query}")
             return "No web results found."
         search_results = []
-        for i, item in enumerate(results[:5]):
             title = item.get("title", "")
             snippet = item.get("snippet", "")
             link = item.get("link", "")
-            try:
-                page_response = requests.get(link, timeout=5)
-                page_response.raise_for_status()
-                soup = BeautifulSoup(page_response.text, "html.parser")
-                paragraphs = soup.find_all("p")
-                page_content = " ".join([p.get_text() for p in paragraphs][:1000])
-            except Exception as e:
-                logger.warning(f"Failed to fetch page content for {link}: {e}")
-                page_content = snippet
-            search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
         return "\n".join(search_results)
     except Exception as e:
-        logger.exception(f"Web search failed for query: {query}")
         return f"Web search error: {e}"

         google_api_key = os.getenv("GOOGLE_API_KEY")
         google_cse_id = os.getenv("GOOGLE_CSE_ID")
         if not google_api_key or not google_cse_id:
             return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
         url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
+        response = requests.get(url, timeout=5)
         response.raise_for_status()
         results = response.json().get("items", [])
         if not results:
             return "No web results found."
         search_results = []
+        for i, item in enumerate(results[:3]):  # قللنا العدد لتسريع البحث
             title = item.get("title", "")
             snippet = item.get("snippet", "")
             link = item.get("link", "")
+            search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {snippet}\n")
         return "\n".join(search_results)
     except Exception as e:
+        logger.exception(f"Web search failed: {e}")
         return f"Web search error: {e}"