Spaces:

ibrahimlasfar
/

mgpt

Runtime error

App Files Files Community

ibrahimlasfar commited on Aug 31, 2025

Commit

ae2582f

1 Parent(s): 815d5f0

Update chatbot with audio/image support and fixed models

Browse files

Files changed (8) hide show

Dockerfile +2 -1
README.md +18 -7
api/endpoints.py +15 -25
api/models.py +2 -2
main.py +36 -89
requirements.txt +4 -4
utils/generation.py +182 -62
utils/web_search.py +7 -5

Dockerfile CHANGED Viewed

@@ -3,12 +3,13 @@ FROM python:3.10-slim
 # Set working directory
 WORKDIR /app
-# Install chromium-driver and build dependencies
 RUN apt-get update && apt-get install -y \
     chromium-driver \
     git \
     gcc \
     libc-dev \
     && apt-get clean && rm -rf /var/lib/apt/lists/*
 # Update pip

 # Set working directory
 WORKDIR /app
+# Install system dependencies
 RUN apt-get update && apt-get install -y \
     chromium-driver \
     git \
     gcc \
     libc-dev \
+    ffmpeg \
     && apt-get clean && rm -rf /var/lib/apt/lists/*
 # Update pip

README.md CHANGED Viewed

@@ -3,7 +3,7 @@ title: MGZON Chat
 emoji: "🤖"
 colorFrom: "blue"
 colorTo: "green"
-sdk: docker
 app_file: main.py
 pinned: false
 ---
@@ -38,12 +38,23 @@ It achieves the following results on the evaluation set:
 - Loss: nan
 ## Features
-- Real-time voice input/output with Whisper and Parler-TTS.
-- Image capture and analysis with CLIP.
-- Web search integration with Google API.
-- Model selection for flexible query handling.
-- Enhanced UI with custom icons and responsive design.
 ## Model description

 emoji: "🤖"
 colorFrom: "blue"
 colorTo: "green"
+sdk: gradio
 app_file: main.py
 pinned: false
 ---
 - Loss: nan
 ## Features
+- **Text Queries**: Ask anything and get detailed responses.
+- **Audio Input/Output**: Record audio directly or convert text to speech.
+- **Image Analysis**: Capture images from webcam or upload for analysis.
+- **Web Search**: Enable DeepSearch for real-time web context.
+- **API Support**: Use endpoints like `/api/chat`, `/api/audio-transcription`, `/api/text-to-speech`, `/api/image-analysis`.
+## Setup
+1. Add `HF_TOKEN` and `BACKUP_HF_TOKEN` as Secrets in Space settings.
+2. Add `GOOGLE_API_KEY` and `GOOGLE_CSE_ID` for web search (optional).
+3. Set `PORT=7860`, `QUEUE_SIZE=80`, `CONCURRENCY_LIMIT=20` as Variables.
+4. Ensure `requirements.txt` and `Dockerfile` are configured correctly.
+## Usage
+Access the app at `/gradio` or use API endpoints. Examples:
+- **Text**: "Explain AI history."
+- **Audio**: Record audio for transcription.
+- **Image**: Capture or upload an image for analysis.
 ## Model description

api/endpoints.py CHANGED Viewed

@@ -11,15 +11,15 @@ router = APIRouter()
 HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
-API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
 @router.get("/api/model-info")
 def model_info():
     return {
         "model_name": MODEL_NAME,
-        "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "openai/gpt-oss-20b:together"),
-        "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
         "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
         "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
         "api_base": API_ENDPOINT,
@@ -36,7 +36,7 @@ async def performance_stats():
 @router.post("/api/chat")
 async def chat_endpoint(req: QueryRequest):
-    model_name, api_endpoint = select_model(req.message, model_choice=req.model_choice if hasattr(req, 'model_choice') else None)
     stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
@@ -47,7 +47,6 @@ async def chat_endpoint(req: QueryRequest):
         temperature=req.temperature,
         max_new_tokens=req.max_new_tokens,
         deep_search=req.enable_browsing,
-        output_type="text"
     )
     response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"response": response}
@@ -56,7 +55,7 @@ async def chat_endpoint(req: QueryRequest):
 async def audio_transcription_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
     audio_data = await file.read()
-    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Transcribe audio",
@@ -66,16 +65,14 @@ async def audio_transcription_endpoint(file: UploadFile = File(...)):
         max_new_tokens=128000,
         input_type="audio",
         audio_data=audio_data,
-        output_type="text"
-    )
-    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"transcription": response}
 @router.post("/api/text-to-speech")
 async def text_to_speech_endpoint(req: dict):
     text = req.get("text", "")
     model_name, api_endpoint = select_model("text to speech", input_type="text")
-    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=text,
@@ -84,9 +81,8 @@ async def text_to_speech_endpoint(req: dict):
         temperature=0.7,
         max_new_tokens=128000,
         input_type="text",
-        output_type="speech"
     )
-    audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
     return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
 @router.post("/api/code")
@@ -96,7 +92,7 @@ async def code_endpoint(req: dict):
     code = req.get("code", "")
     prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
     model_name, api_endpoint = select_model(prompt)
-    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=prompt,
@@ -104,16 +100,14 @@ async def code_endpoint(req: dict):
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-        output_type="text"
-    )
-    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"generated_code": response}
 @router.post("/api/analysis")
 async def analysis_endpoint(req: dict):
     message = req.get("text", "")
     model_name, api_endpoint = select_model(message)
-    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=message,
@@ -121,16 +115,14 @@ async def analysis_endpoint(req: dict):
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-        output_type="text"
-    )
-    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"analysis": response}
 @router.post("/api/image-analysis")
 async def image_analysis_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("image analysis", input_type="image")
     image_data = await file.read()
-    stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Analyze this image",
@@ -140,9 +132,7 @@ async def image_analysis_endpoint(file: UploadFile = File(...)):
         max_new_tokens=128000,
         input_type="image",
         image_data=image_data,
-        output_type="text"
-    )
-    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"image_analysis": response}
 @router.get("/api/test-model")

 HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
+API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co")
+MODEL_NAME = os.getenv("MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
 @router.get("/api/model-info")
 def model_info():
     return {
         "model_name": MODEL_NAME,
+        "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct"),
+        "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x22B-Instruct-v0.1"),
         "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
         "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
         "api_base": API_ENDPOINT,
 @router.post("/api/chat")
 async def chat_endpoint(req: QueryRequest):
+    model_name, api_endpoint = select_model(req.message)
     stream = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         temperature=req.temperature,
         max_new_tokens=req.max_new_tokens,
         deep_search=req.enable_browsing,
     )
     response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"response": response}
 async def audio_transcription_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
     audio_data = await file.read()
+    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Transcribe audio",
         max_new_tokens=128000,
         input_type="audio",
         audio_data=audio_data,
+    ) if isinstance(chunk, str)])
     return {"transcription": response}
 @router.post("/api/text-to-speech")
 async def text_to_speech_endpoint(req: dict):
     text = req.get("text", "")
     model_name, api_endpoint = select_model("text to speech", input_type="text")
+    response = request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=text,
         temperature=0.7,
         max_new_tokens=128000,
         input_type="text",
     )
+    audio_data = b"".join([chunk for chunk in response if isinstance(chunk, bytes)])
     return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
 @router.post("/api/code")
     code = req.get("code", "")
     prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
     model_name, api_endpoint = select_model(prompt)
+    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=prompt,
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+    ) if isinstance(chunk, str)])
     return {"generated_code": response}
 @router.post("/api/analysis")
 async def analysis_endpoint(req: dict):
     message = req.get("text", "")
     model_name, api_endpoint = select_model(message)
+    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=message,
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+    ) if isinstance(chunk, str)])
     return {"analysis": response}
 @router.post("/api/image-analysis")
 async def image_analysis_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("image analysis", input_type="image")
     image_data = await file.read()
+    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Analyze this image",
         max_new_tokens=128000,
         input_type="image",
         image_data=image_data,
+    ) if isinstance(chunk, str)])
     return {"image_analysis": response}
 @router.get("/api/test-model")

api/models.py CHANGED Viewed

@@ -3,8 +3,8 @@ from typing import List, Optional
 class QueryRequest(BaseModel):
     message: str
-    system_prompt: str = "You are an expert assistant providing detailed, comprehensive, and well-structured responses. For code, include comments, examples, and complete implementations. For image-related queries, provide detailed analysis or descriptions. For general queries, provide in-depth explanations with examples and additional context where applicable. Continue generating content until the query is fully addressed, leveraging the full capacity of the model."
     history: Optional[List[dict]] = None
     temperature: float = 0.7
     max_new_tokens: int = 128000
-    enable_browsing: bool = False

 class QueryRequest(BaseModel):
     message: str
+    system_prompt: str = "You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, and image inputs. Transcribe audio using Whisper, convert text to speech using Parler-TTS, and analyze images using CLIP. Respond with text or audio based on input type. Continue until the query is fully addressed."
     history: Optional[List[dict]] = None
     temperature: float = 0.7
     max_new_tokens: int = 128000
+    enable_browsing: bool = True

main.py CHANGED Viewed

@@ -29,86 +29,48 @@ if not HF_TOKEN:
 QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
 CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
-# إعداد CSS
 css = """
 .gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
-.chatbot { border: 1px solid #ccc; border-radius: 12px; padding: 20px; background-color: #f0f4f8; }
-.input-textbox { font-size: 18px; padding: 12px; border-radius: 8px; }
-.upload-button::before {
-    content: '📸';
-    margin-right: 10px;
-    font-size: 24px;
 }
-.audio-input::before {
-    content: '🎙️';
-    margin-right: 10px;
-    font-size: 24px;
-}
-.audio-output::before {
-    content: '🔊';
-    margin-right: 10px;
-    font-size: 24px;
-}
-.send-button {
-    background-color: #007bff;
-    color: white;
-    padding: 10px 20px;
-    border-radius: 8px;
-    cursor: pointer;
-    font-size: 16px;
-    transition: background-color 0.3s;
-}
-.send-button:hover {
-    background-color: #0056b3;
 }
 .loading::after {
-    content: '';
-    display: inline-block;
-    width: 18px;
-    height: 18px;
-    border: 3px solid #007bff;
-    border-top-color: transparent;
-    border-radius: 50%;
-    animation: spin 1s linear infinite;
-    margin-left: 10px;
-}
-@keyframes spin {
-    to { transform: rotate(360deg); }
 }
-.output-container {
-    margin-top: 20px;
-    padding: 15px;
-    border: 1px solid #ddd;
-    border-radius: 10px;
-    background-color: #fff;
-}
-.audio-output-container {
-    display: flex;
-    align-items: center;
-    gap: 12px;
-    margin-top: 15px;
-}
-.model-selector {
-    border-radius: 8px;
-    padding: 10px;
-    font-size: 16px;
 }
 """
-# دالة لمعالجة الإدخال (نص، صوت، صور، ملفات)
-def process_input(message, audio_input=None, image_input=None, model_choice="openai/gpt-oss-120b:cerebras", history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000, output_type="text"):
     input_type = "text"
     audio_data = None
     image_data = None
     if audio_input:
         input_type = "audio"
         audio_data = audio_input
-        message = "Transcribe this audio and respond accordingly"
     elif image_input:
         input_type = "image"
         image_data = image_input
-        message = f"Analyze this image: {message or 'Describe the image'}"
     response_text = ""
     audio_response = None
@@ -122,9 +84,7 @@ def process_input(message, audio_input=None, image_input=None, model_choice="ope
         max_new_tokens=max_new_tokens,
         input_type=input_type,
         audio_data=audio_data,
-        image_data=image_data,
-        model_choice=model_choice,
-        output_type=output_type
     ):
         if isinstance(chunk, bytes):
             audio_response = io.BytesIO(chunk)
@@ -140,47 +100,34 @@ chatbot_ui = gr.ChatInterface(
         label="MGZon Chatbot",
         height=800,
         latex_delimiters=LATEX_DELIMS,
     ),
     additional_inputs_accordion=gr.Accordion("⚙️ Settings", open=True),
     additional_inputs=[
         gr.Textbox(
             label="System Prompt",
-            value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image, and file inputs. For audio, transcribe using Whisper and respond with text or speech. For images, analyze using CLIP and provide detailed descriptions. For general queries, use the selected model to provide in-depth answers.",
             lines=4
         ),
         gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
         gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
-        gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
         gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
-        gr.Dropdown(
-            label="Model Choice",
-            choices=[
-                "openai/gpt-oss-120b:cerebras",
-                "openai/gpt-oss-20b:together",
-                "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
-                "mistralai/Mixtral-8x7B-Instruct-v0.1",
-                "openai/clip-vit-base-patch32",
-                "openai/whisper-large-v3-turbo",
-                "parler-tts/parler-tts-mini-v1"
-            ],
-            value="openai/gpt-oss-120b:cerebras",
-            elem_classes="model-selector"
-        ),
-        gr.Audio(label="Record & Send Voice", type="numpy", streaming=True, elem_classes="audio-input"),
-        gr.Image(label="Capture & Send Image", type="numpy", source="webcam", elem_classes="upload-button"),
-        gr.Radio(label="Output Type", choices=["text", "speech"], value="text")
     ],
-    additional_outputs=[gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output", autoplay=True)],
     stop_btn="Stop",
     examples=[
         ["Explain the history of AI in detail."],
         ["Generate a React login component with validation."],
-        ["Describe this image: [capture image]."],
-        ["Transcribe and respond to this audio: [record audio]."],
-        ["Convert this text to speech: Welcome to MGZon!"],
     ],
     title="MGZon Chatbot",
-    description="A versatile chatbot powered by multiple models for text, image, and audio queries. Supports real-time voice and image input, model selection, and web search. Licensed under Apache 2.0.",
     theme="gradio/soft",
     css=css,
 )

 QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
 CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
+# إعداد CSS محسّن
 css = """
 .gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
+.chatbot { border: 1px solid #ccc; border-radius: 12px; padding: 20px; background-color: #f5f5f5; }
+.input-textbox { font-size: 16px; padding: 12px; border-radius: 8px; }
+.upload-button, .audio-button, .camera-button {
+    background-color: #007bff; color: white; padding: 10px 20px; border-radius: 8px;
+    display: inline-flex; align-items: center; gap: 8px; font-size: 16px;
 }
+.upload-button::before { content: '📷'; font-size: 20px; }
+.audio-button::before { content: '🎤'; font-size: 20px; }
+.camera-button::before { content: '📸'; font-size: 20px; }
+.audio-output-container {
+    display: flex; align-items: center; gap: 12px; margin-top: 15px;
+    background-color: #e9ecef; padding: 10px; border-radius: 8px;
 }
+.audio-output-container::before { content: '🔊'; font-size: 20px; }
 .loading::after {
+    content: ''; display: inline-block; width: 18px; height: 18px;
+    border: 3px solid #007bff; border-top-color: transparent;
+    border-radius: 50%; animation: spin 1s linear infinite; margin-left: 10px;
 }
+@keyframes spin { to { transform: rotate(360deg); } }
+.output-container {
+    margin-top: 20px; padding: 15px; border: 1px solid #ddd;
+    border-radius: 10px; background-color: white;
 }
 """
+# دالة لمعالجة الإدخال
+def process_input(message, audio_input=None, image_input=None, history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000):
     input_type = "text"
     audio_data = None
     image_data = None
     if audio_input:
         input_type = "audio"
         audio_data = audio_input
+        message = "Transcribe this audio"
     elif image_input:
         input_type = "image"
         image_data = image_input
+        message = f"Analyze image: {message or 'describe this image'}"
     response_text = ""
     audio_response = None
         max_new_tokens=max_new_tokens,
         input_type=input_type,
         audio_data=audio_data,
+        image_data=image_data
     ):
         if isinstance(chunk, bytes):
             audio_response = io.BytesIO(chunk)
         label="MGZon Chatbot",
         height=800,
         latex_delimiters=LATEX_DELIMS,
+        elem_classes="chatbot",
     ),
     additional_inputs_accordion=gr.Accordion("⚙️ Settings", open=True),
     additional_inputs=[
         gr.Textbox(
             label="System Prompt",
+            value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image inputs. Transcribe audio using Whisper, convert text to speech using Parler-TTS, and analyze images using CLIP. Respond with text or audio based on input type. Continue until the query is fully addressed.",
             lines=4
         ),
         gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
         gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
+        gr.Checkbox(label="Enable DeepSearch", value=True),
         gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
+        gr.Audio(label="Record Audio", source="microphone", type="numpy", elem_classes="audio-button"),
+        gr.Image(label="Capture Image", source="webcam", type="numpy", elem_classes="camera-button"),
+        gr.File(label="Upload Image/File", file_types=["image", ".pdf", ".txt"], elem_classes="upload-button"),
     ],
+    additional_outputs=[gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output-container", autoplay=True)],
     stop_btn="Stop",
     examples=[
         ["Explain the history of AI in detail."],
         ["Generate a React login component with validation."],
+        ["Describe this image: [capture or upload image]"],
+        ["Transcribe this audio: [record audio]"],
+        ["Convert to speech: Hello, welcome to MGZon!"],
     ],
     title="MGZon Chatbot",
+    description="A versatile chatbot powered by Hugging Face models for text, image, and audio queries. Supports real-time audio recording, webcam image capture, and web search. Licensed under Apache 2.0.",
     theme="gradio/soft",
     css=css,
 )

requirements.txt CHANGED Viewed

@@ -1,11 +1,11 @@
 fastapi==0.115.2
 uvicorn==0.30.6
-gradio>=4.44.1
 openai==1.42.0
 httpx==0.27.0
 python-dotenv==1.0.1
 pydocstyle==6.3.0
-requests==2.32.5
 beautifulsoup4==4.12.3
 tenacity==8.5.0
 selenium==4.25.0
@@ -15,10 +15,10 @@ cachetools==5.5.0
 pydub==0.25.1
 ffmpeg-python==0.2.0
 numpy==1.26.4
-parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
 torch==2.4.1
 torchaudio==2.4.1
-transformers==4.43.3
 webrtcvad==2.0.10
 Pillow==10.4.0
 urllib3==2.0.7

 fastapi==0.115.2
 uvicorn==0.30.6
+gradio==4.48.0
 openai==1.42.0
 httpx==0.27.0
 python-dotenv==1.0.1
 pydocstyle==6.3.0
+requests==2.32.3
 beautifulsoup4==4.12.3
 tenacity==8.5.0
 selenium==4.25.0
 pydub==0.25.1
 ffmpeg-python==0.2.0
 numpy==1.26.4
+parler-tts==0.2.0
 torch==2.4.1
 torchaudio==2.4.1
+transformers==4.45.1
 webrtcvad==2.0.10
 Pillow==10.4.0
 urllib3==2.0.7

utils/generation.py CHANGED Viewed

@@ -13,9 +13,10 @@ import pydub
 import io
 import torchaudio
 from PIL import Image
 from transformers import CLIPModel, CLIPProcessor, AutoProcessor
 from parler_tts import ParlerTTSForConditionalGeneration
-from utils.web_search import web_search  # نقل الاستيراد خارج الدالة
 logger = logging.getLogger(__name__)
@@ -33,14 +34,14 @@ LATEX_DELIMS = [
 # إعداد العميل لـ Hugging Face Inference API
 HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
-API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
-FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
-SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "openai/gpt-oss-20b:together")
-TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
-ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3-turbo")
 TTS_MODEL = os.getenv("TTS_MODEL", "parler-tts/parler-tts-mini-v1")
 def check_model_availability(model_name: str, api_base: str, api_key: str) -> tuple[bool, str]:
@@ -64,11 +65,7 @@ def check_model_availability(model_name: str, api_base: str, api_key: str) -> tu
             return check_model_availability(model_name, api_base, BACKUP_HF_TOKEN)
         return False, api_key
-def select_model(query: str, input_type: str = "text", model_choice: Optional[str] = None) -> tuple[str, str]:
-    if model_choice:
-        logger.info(f"User-selected model: {model_choice}")
-        return model_choice, API_ENDPOINT if model_choice in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME] else FALLBACK_API_ENDPOINT
     query_lower = query.lower()
     if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
         logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
@@ -104,13 +101,14 @@ def request_generation(
     input_type: str = "text",
     audio_data: Optional[bytes] = None,
     image_data: Optional[bytes] = None,
-    output_type: str = "text"
 ) -> Generator[bytes | str, None, None]:
     is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
     if not is_available:
         yield f"Error: Model {model_name} is not available. Please check the model endpoint or token."
         return
     cache_key = hashlib.md5(json.dumps({
         "message": message,
         "system_prompt": system_prompt,
@@ -134,7 +132,7 @@ def request_generation(
     if model_name == ASR_MODEL and audio_data is not None:
         task_type = "audio_transcription"
         try:
-            audio_file = io.BytesIO(audio_data)
             audio = pydub.AudioSegment.from_file(audio_file)
             audio = audio.set_channels(1).set_frame_rate(16000)
             audio_file = io.BytesIO()
@@ -146,15 +144,6 @@ def request_generation(
                 response_format="text"
             )
             yield transcription
-            if output_type == "speech":
-                tts_model = TTS_MODEL
-                tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=transcription, return_tensors="pt")
-                tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
-                audio = tts_model_instance.generate(**tts_inputs)
-                audio_file = io.BytesIO()
-                torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
-                audio_file.seek(0)
-                yield audio_file.read()
             cache[cache_key] = [transcription]
             return
         except Exception as e:
@@ -163,11 +152,11 @@ def request_generation(
             return
     # معالجة تحويل النص إلى صوت (TTS)
-    if model_name == TTS_MODEL or output_type == "speech":
         task_type = "text_to_speech"
         try:
-            model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
-            processor = AutoProcessor.from_pretrained(TTS_MODEL)
             inputs = processor(text=message, return_tensors="pt")
             audio = model.generate(**inputs)
             audio_file = io.BytesIO()
@@ -187,23 +176,13 @@ def request_generation(
         try:
             model = CLIPModel.from_pretrained(model_name)
             processor = CLIPProcessor.from_pretrained(model_name)
-            image = Image.open(io.BytesIO(image_data)).convert("RGB")
             inputs = processor(text=message, images=image, return_tensors="pt", padding=True)
             outputs = model(**inputs)
             logits_per_image = outputs.logits_per_image
             probs = logits_per_image.softmax(dim=1)
-            analysis = f"Image analysis result: {probs.tolist()}"
-            yield analysis
-            if output_type == "speech":
-                tts_model = TTS_MODEL
-                tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=analysis, return_tensors="pt")
-                tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
-                audio = tts_model_instance.generate(**tts_inputs)
-                audio_file = io.BytesIO()
-                torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
-                audio_file.seek(0)
-                yield audio_file.read()
-            cache[cache_key] = [analysis]
             return
         except Exception as e:
             logger.error(f"Image analysis failed: {e}")
@@ -213,16 +192,26 @@ def request_generation(
     # تحسين system_prompt بناءً على نوع المهمة
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
         task_type = "image"
-        enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query."
     elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
         task_type = "code"
-        enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations."
     elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
         task_type = "analysis"
-        enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights."
     else:
-        enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable."
     input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
     if chat_history:
         for msg in chat_history:
@@ -258,6 +247,8 @@ def request_generation(
         reasoning_started = False
         reasoning_closed = False
         saw_visible_output = False
         buffer = ""
         for chunk in stream:
@@ -285,6 +276,16 @@ def request_generation(
                     buffer = ""
                 continue
             if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
                 if buffer:
                     cached_chunks.append(buffer)
@@ -297,8 +298,16 @@ def request_generation(
                     reasoning_closed = True
                 if not saw_visible_output:
-                    cached_chunks.append("No visible output produced.")
-                    yield "No visible output produced."
                 if chunk.choices[0].finish_reason == "error":
                     cached_chunks.append(f"Error: Unknown error")
                     yield f"Error: Unknown error"
@@ -311,16 +320,6 @@ def request_generation(
             cached_chunks.append(buffer)
             yield buffer
-        if output_type == "speech":
-            tts_model = TTS_MODEL
-            tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=buffer, return_tensors="pt")
-            tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
-            audio = tts_model_instance.generate(**tts_inputs)
-            audio_file = io.BytesIO()
-            torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
-            audio_file.seek(0)
-            yield audio_file.read()
         cache[cache_key] = cached_chunks
     except Exception as e:
@@ -343,12 +342,134 @@ def request_generation(
                 input_type=input_type,
                 audio_data=audio_data,
                 image_data=image_data,
-                output_type=output_type
             ):
                 yield chunk
             return
-        yield f"Error: Failed to load model {model_name}: {e}"
-        return
 def format_final(analysis_text: str, visible_text: str) -> str:
     reasoning_safe = html.escape((analysis_text or "").strip())
@@ -364,12 +485,12 @@ def format_final(analysis_text: str, visible_text: str) -> str:
         f"{response}" if response else "No final response available."
     )
-def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None, model_choice=None, output_type="text"):
     if not message.strip() and not audio_data and not image_data:
         yield "Please enter a prompt, record audio, or capture an image."
         return
-    model_name, api_endpoint = select_model(message, input_type=input_type, model_choice=model_choice)
     chat_history = []
     for h in history:
         if isinstance(h, dict):
@@ -398,7 +519,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
             "type": "function",
             "function": {
                 "name": "code_generation",
-                "description": "Generate or modify code for various frameworks",
                 "parameters": {
                     "type": "object",
                     "properties": {
@@ -476,7 +597,6 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
             input_type=input_type,
             audio_data=audio_data,
             image_data=image_data,
-            output_type=output_type
         )
         for chunk in stream:

 import io
 import torchaudio
 from PIL import Image
+import numpy as np
 from transformers import CLIPModel, CLIPProcessor, AutoProcessor
 from parler_tts import ParlerTTSForConditionalGeneration
+from utils.web_search import web_search  # استيراد مباشر
 logger = logging.getLogger(__name__)
 # إعداد العميل لـ Hugging Face Inference API
 HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
+API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co")
+FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co"
+MODEL_NAME = os.getenv("MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
+SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
+TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x22B-Instruct-v0.1")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
+ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
 TTS_MODEL = os.getenv("TTS_MODEL", "parler-tts/parler-tts-mini-v1")
 def check_model_availability(model_name: str, api_base: str, api_key: str) -> tuple[bool, str]:
             return check_model_availability(model_name, api_base, BACKUP_HF_TOKEN)
         return False, api_key
+def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
     query_lower = query.lower()
     if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
         logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
     input_type: str = "text",
     audio_data: Optional[bytes] = None,
     image_data: Optional[bytes] = None,
 ) -> Generator[bytes | str, None, None]:
+    # التحقق من توفر النموذج
     is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
     if not is_available:
         yield f"Error: Model {model_name} is not available. Please check the model endpoint or token."
         return
+    # إنشاء مفتاح للـ cache
     cache_key = hashlib.md5(json.dumps({
         "message": message,
         "system_prompt": system_prompt,
     if model_name == ASR_MODEL and audio_data is not None:
         task_type = "audio_transcription"
         try:
+            audio_file = io.BytesIO(audio_data if isinstance(audio_data, bytes) else audio_data.tobytes())
             audio = pydub.AudioSegment.from_file(audio_file)
             audio = audio.set_channels(1).set_frame_rate(16000)
             audio_file = io.BytesIO()
                 response_format="text"
             )
             yield transcription
             cache[cache_key] = [transcription]
             return
         except Exception as e:
             return
     # معالجة تحويل النص إلى صوت (TTS)
+    if model_name == TTS_MODEL:
         task_type = "text_to_speech"
         try:
+            model = ParlerTTSForConditionalGeneration.from_pretrained(model_name)
+            processor = AutoProcessor.from_pretrained(model_name)
             inputs = processor(text=message, return_tensors="pt")
             audio = model.generate(**inputs)
             audio_file = io.BytesIO()
         try:
             model = CLIPModel.from_pretrained(model_name)
             processor = CLIPProcessor.from_pretrained(model_name)
+            image = Image.fromarray(np.uint8(image_data)) if isinstance(image_data, np.ndarray) else Image.open(io.BytesIO(image_data)).convert("RGB")
             inputs = processor(text=message, images=image, return_tensors="pt", padding=True)
             outputs = model(**inputs)
             logits_per_image = outputs.logits_per_image
             probs = logits_per_image.softmax(dim=1)
+            yield f"Image analysis result: {probs.tolist()}"
+            cache[cache_key] = [f"Image analysis result: {probs.tolist()}"]
             return
         except Exception as e:
             logger.error(f"Image analysis failed: {e}")
     # تحسين system_prompt بناءً على نوع المهمة
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
         task_type = "image"
+        enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query. Continue until the query is fully addressed."
     elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
         task_type = "code"
+        enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations. Support frameworks like React, Django, Flask, and others. Format code with triple backticks (```) and specify the language. Continue until the task is fully addressed."
     elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
         task_type = "analysis"
+        enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights. Continue until all aspects of the query are thoroughly covered."
+    elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
+        task_type = "review"
+        enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations. Ensure the response is complete and detailed."
+    elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
+        task_type = "publish"
+        enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices. Provide a complete and detailed response."
     else:
+        enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable. Continue generating content until the query is fully answered, leveraging the full capacity of the model."
+    if len(message.split()) < 5:
+        enhanced_system_prompt += "\nEven for short or general queries, provide a detailed, in-depth response with examples, explanations, and additional context to ensure completeness."
+    logger.info(f"Task type detected: {task_type}")
     input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
     if chat_history:
         for msg in chat_history:
         reasoning_started = False
         reasoning_closed = False
         saw_visible_output = False
+        last_tool_name = None
+        last_tool_args = None
         buffer = ""
         for chunk in stream:
                     buffer = ""
                 continue
+            if chunk.choices[0].delta.tool_calls and model_name in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME]:
+                tool_call = chunk.choices[0].delta.tool_calls[0]
+                name = getattr(tool_call, "function", {}).get("name", None)
+                args = getattr(tool_call, "function", {}).get("arguments", None)
+                if name:
+                    last_tool_name = name
+                if args:
+                    last_tool_args = args
+                continue
             if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
                 if buffer:
                     cached_chunks.append(buffer)
                     reasoning_closed = True
                 if not saw_visible_output:
+                    msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced."
+                    if last_tool_name:
+                        try:
+                            args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
+                        except Exception:
+                            args_text = str(last_tool_args)
+                        msg += f"\n\n• Tool requested: **{last_tool_name}**\n• Arguments: `{args_text}`"
+                    cached_chunks.append(msg)
+                    yield msg
                 if chunk.choices[0].finish_reason == "error":
                     cached_chunks.append(f"Error: Unknown error")
                     yield f"Error: Unknown error"
             cached_chunks.append(buffer)
             yield buffer
         cache[cache_key] = cached_chunks
     except Exception as e:
                 input_type=input_type,
                 audio_data=audio_data,
                 image_data=image_data,
             ):
                 yield chunk
             return
+        if model_name == MODEL_NAME:
+            fallback_model = SECONDARY_MODEL_NAME
+            fallback_endpoint = FALLBACK_API_ENDPOINT
+            logger.info(f"Retrying with fallback model: {fallback_model} on {fallback_endpoint}")
+            try:
+                is_available, selected_api_key = check_model_availability(fallback_model, fallback_endpoint, selected_api_key)
+                if not is_available:
+                    yield f"Error: Fallback model {fallback_model} is not available."
+                    return
+                client = OpenAI(api_key=selected_api_key, base_url=fallback_endpoint, timeout=120.0)
+                stream = client.chat.completions.create(
+                    model=fallback_model,
+                    messages=input_messages,
+                    temperature=temperature,
+                    max_tokens=max_new_tokens,
+                    stream=True,
+                    tools=[],
+                    tool_choice="none",
+                )
+                for chunk in stream:
+                    if chunk.choices[0].delta.content:
+                        content = chunk.choices[0].delta.content
+                        if content == "<|channel|>analysis<|message|>":
+                            if not reasoning_started:
+                                cached_chunks.append("analysis")
+                                yield "analysis"
+                                reasoning_started = True
+                            continue
+                        if content == "<|channel|>final<|message|>":
+                            if reasoning_started and not reasoning_closed:
+                                cached_chunks.append("assistantfinal")
+                                yield "assistantfinal"
+                                reasoning_closed = True
+                            continue
+                        saw_visible_output = True
+                        buffer += content
+                        if "\n" in buffer or len(buffer) > 5000:
+                            cached_chunks.append(buffer)
+                            yield buffer
+                            buffer = ""
+                        continue
+                    if chunk.choices[0].finish_reason in ("stop", "error", "length"):
+                        if buffer:
+                            cached_chunks.append(buffer)
+                            yield buffer
+                            buffer = ""
+                        if reasoning_started and not reasoning_closed:
+                            cached_chunks.append("assistantfinal")
+                            yield "assistantfinal"
+                            reasoning_closed = True
+                        if not saw_visible_output:
+                            cached_chunks.append("No visible output produced.")
+                            yield "No visible output produced."
+                        if chunk.choices[0].finish_reason == "error":
+                            cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
+                            yield f"Error: Unknown error with fallback model {fallback_model}"
+                        elif chunk.choices[0].finish_reason == "length":
+                            cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
+                            yield "Response truncated due to token limit. Please refine your query or request continuation."
+                        break
+                if buffer:
+                    cached_chunks.append(buffer)
+                    yield buffer
+                cache[cache_key] = cached_chunks
+            except Exception as e2:
+                logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
+                try:
+                    is_available, selected_api_key = check_model_availability(TERTIARY_MODEL_NAME, FALLBACK_API_ENDPOINT, selected_api_key)
+                    if not is_available:
+                        yield f"Error: Tertiary model {TERTIARY_MODEL_NAME} is not available."
+                        return
+                    client = OpenAI(api_key=selected_api_key, base_url=FALLBACK_API_ENDPOINT, timeout=120.0)
+                    stream = client.chat.completions.create(
+                        model=TERTIARY_MODEL_NAME,
+                        messages=input_messages,
+                        temperature=temperature,
+                        max_tokens=max_new_tokens,
+                        stream=True,
+                        tools=[],
+                        tool_choice="none",
+                    )
+                    for chunk in stream:
+                        if chunk.choices[0].delta.content:
+                            content = chunk.choices[0].delta.content
+                            saw_visible_output = True
+                            buffer += content
+                            if "\n" in buffer or len(buffer) > 5000:
+                                cached_chunks.append(buffer)
+                                yield buffer
+                                buffer = ""
+                            continue
+                        if chunk.choices[0].finish_reason in ("stop", "error", "length"):
+                            if buffer:
+                                cached_chunks.append(buffer)
+                                yield buffer
+                                buffer = ""
+                            if not saw_visible_output:
+                                cached_chunks.append("No visible output produced.")
+                                yield "No visible output produced."
+                            if chunk.choices[0].finish_reason == "error":
+                                cached_chunks.append(f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}")
+                                yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
+                            elif chunk.choices[0].finish_reason == "length":
+                                cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
+                                yield "Response truncated due to token limit. Please refine your query or request continuation."
+                            break
+                    if buffer:
+                        cached_chunks.append(buffer)
+                        yield buffer
+                    cache[cache_key] = cached_chunks
+                except Exception as e3:
+                    logger.exception(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
+                    yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({fallback_model}), Tertiary ({TERTIARY_MODEL_NAME}). Please check your model configurations."
+                    return
+        else:
+            yield f"Error: Failed to load model {model_name}: {e}"
+            return
 def format_final(analysis_text: str, visible_text: str) -> str:
     reasoning_safe = html.escape((analysis_text or "").strip())
         f"{response}" if response else "No final response available."
     )
+def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None):
     if not message.strip() and not audio_data and not image_data:
         yield "Please enter a prompt, record audio, or capture an image."
         return
+    model_name, api_endpoint = select_model(message, input_type=input_type)
     chat_history = []
     for h in history:
         if isinstance(h, dict):
             "type": "function",
             "function": {
                 "name": "code_generation",
+                "description": "Generate or modify code for various frameworks (React, Django, Flask, etc.)",
                 "parameters": {
                     "type": "object",
                     "properties": {
             input_type=input_type,
             audio_data=audio_data,
             image_data=image_data,
         )
         for chunk in stream:

utils/web_search.py CHANGED Viewed

@@ -10,29 +10,31 @@ def web_search(query: str) -> str:
         google_api_key = os.getenv("GOOGLE_API_KEY")
         google_cse_id = os.getenv("GOOGLE_CSE_ID")
         if not google_api_key or not google_cse_id:
             return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
         url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
-        response = requests.get(url, timeout=5)
         response.raise_for_status()
         results = response.json().get("items", [])
         if not results:
             return "No web results found."
         search_results = []
-        for i, item in enumerate(results[:3]):  # قللنا العدد لتسريع البحث
             title = item.get("title", "")
             snippet = item.get("snippet", "")
             link = item.get("link", "")
             try:
-                page_response = requests.get(link, timeout=3)
                 page_response.raise_for_status()
                 soup = BeautifulSoup(page_response.text, "html.parser")
                 paragraphs = soup.find_all("p")
-                page_content = " ".join([p.get_text() for p in paragraphs][:500])
             except Exception as e:
                 logger.warning(f"Failed to fetch page content for {link}: {e}")
                 page_content = snippet
             search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
         return "\n".join(search_results)
     except Exception as e:
-        logger.exception("Web search failed")
         return f"Web search error: {e}"

         google_api_key = os.getenv("GOOGLE_API_KEY")
         google_cse_id = os.getenv("GOOGLE_CSE_ID")
         if not google_api_key or not google_cse_id:
+            logger.warning("GOOGLE_API_KEY or GOOGLE_CSE_ID not set.")
             return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
         url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
+        response = requests.get(url, timeout=10)
         response.raise_for_status()
         results = response.json().get("items", [])
         if not results:
+            logger.info(f"No web results found for query: {query}")
             return "No web results found."
         search_results = []
+        for i, item in enumerate(results[:5]):
             title = item.get("title", "")
             snippet = item.get("snippet", "")
             link = item.get("link", "")
             try:
+                page_response = requests.get(link, timeout=5)
                 page_response.raise_for_status()
                 soup = BeautifulSoup(page_response.text, "html.parser")
                 paragraphs = soup.find_all("p")
+                page_content = " ".join([p.get_text() for p in paragraphs][:1000])
             except Exception as e:
                 logger.warning(f"Failed to fetch page content for {link}: {e}")
                 page_content = snippet
             search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
         return "\n".join(search_results)
     except Exception as e:
+        logger.exception(f"Web search failed for query: {query}")
         return f"Web search error: {e}"