ibrahimlasfar commited on
Commit
ae2582f
·
1 Parent(s): 815d5f0

Update chatbot with audio/image support and fixed models

Browse files
Files changed (8) hide show
  1. Dockerfile +2 -1
  2. README.md +18 -7
  3. api/endpoints.py +15 -25
  4. api/models.py +2 -2
  5. main.py +36 -89
  6. requirements.txt +4 -4
  7. utils/generation.py +182 -62
  8. utils/web_search.py +7 -5
Dockerfile CHANGED
@@ -3,12 +3,13 @@ FROM python:3.10-slim
3
  # Set working directory
4
  WORKDIR /app
5
 
6
- # Install chromium-driver and build dependencies
7
  RUN apt-get update && apt-get install -y \
8
  chromium-driver \
9
  git \
10
  gcc \
11
  libc-dev \
 
12
  && apt-get clean && rm -rf /var/lib/apt/lists/*
13
 
14
  # Update pip
 
3
  # Set working directory
4
  WORKDIR /app
5
 
6
+ # Install system dependencies
7
  RUN apt-get update && apt-get install -y \
8
  chromium-driver \
9
  git \
10
  gcc \
11
  libc-dev \
12
+ ffmpeg \
13
  && apt-get clean && rm -rf /var/lib/apt/lists/*
14
 
15
  # Update pip
README.md CHANGED
@@ -3,7 +3,7 @@ title: MGZON Chat
3
  emoji: "🤖"
4
  colorFrom: "blue"
5
  colorTo: "green"
6
- sdk: docker
7
  app_file: main.py
8
  pinned: false
9
  ---
@@ -38,12 +38,23 @@ It achieves the following results on the evaluation set:
38
  - Loss: nan
39
 
40
  ## Features
41
- - Real-time voice input/output with Whisper and Parler-TTS.
42
- - Image capture and analysis with CLIP.
43
- - Web search integration with Google API.
44
- - Model selection for flexible query handling.
45
- - Enhanced UI with custom icons and responsive design.
46
-
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  ## Model description
49
 
 
3
  emoji: "🤖"
4
  colorFrom: "blue"
5
  colorTo: "green"
6
+ sdk: gradio
7
  app_file: main.py
8
  pinned: false
9
  ---
 
38
  - Loss: nan
39
 
40
  ## Features
41
+ - **Text Queries**: Ask anything and get detailed responses.
42
+ - **Audio Input/Output**: Record audio directly or convert text to speech.
43
+ - **Image Analysis**: Capture images from webcam or upload for analysis.
44
+ - **Web Search**: Enable DeepSearch for real-time web context.
45
+ - **API Support**: Use endpoints like `/api/chat`, `/api/audio-transcription`, `/api/text-to-speech`, `/api/image-analysis`.
46
+
47
+ ## Setup
48
+ 1. Add `HF_TOKEN` and `BACKUP_HF_TOKEN` as Secrets in Space settings.
49
+ 2. Add `GOOGLE_API_KEY` and `GOOGLE_CSE_ID` for web search (optional).
50
+ 3. Set `PORT=7860`, `QUEUE_SIZE=80`, `CONCURRENCY_LIMIT=20` as Variables.
51
+ 4. Ensure `requirements.txt` and `Dockerfile` are configured correctly.
52
+
53
+ ## Usage
54
+ Access the app at `/gradio` or use API endpoints. Examples:
55
+ - **Text**: "Explain AI history."
56
+ - **Audio**: Record audio for transcription.
57
+ - **Image**: Capture or upload an image for analysis.
58
 
59
  ## Model description
60
 
api/endpoints.py CHANGED
@@ -11,15 +11,15 @@ router = APIRouter()
11
 
12
  HF_TOKEN = os.getenv("HF_TOKEN")
13
  BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
14
- API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
15
- MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
16
 
17
  @router.get("/api/model-info")
18
  def model_info():
19
  return {
20
  "model_name": MODEL_NAME,
21
- "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "openai/gpt-oss-20b:together"),
22
- "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
23
  "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
24
  "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
25
  "api_base": API_ENDPOINT,
@@ -36,7 +36,7 @@ async def performance_stats():
36
 
37
  @router.post("/api/chat")
38
  async def chat_endpoint(req: QueryRequest):
39
- model_name, api_endpoint = select_model(req.message, model_choice=req.model_choice if hasattr(req, 'model_choice') else None)
40
  stream = request_generation(
41
  api_key=HF_TOKEN,
42
  api_base=api_endpoint,
@@ -47,7 +47,6 @@ async def chat_endpoint(req: QueryRequest):
47
  temperature=req.temperature,
48
  max_new_tokens=req.max_new_tokens,
49
  deep_search=req.enable_browsing,
50
- output_type="text"
51
  )
52
  response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
53
  return {"response": response}
@@ -56,7 +55,7 @@ async def chat_endpoint(req: QueryRequest):
56
  async def audio_transcription_endpoint(file: UploadFile = File(...)):
57
  model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
58
  audio_data = await file.read()
59
- stream = request_generation(
60
  api_key=HF_TOKEN,
61
  api_base=api_endpoint,
62
  message="Transcribe audio",
@@ -66,16 +65,14 @@ async def audio_transcription_endpoint(file: UploadFile = File(...)):
66
  max_new_tokens=128000,
67
  input_type="audio",
68
  audio_data=audio_data,
69
- output_type="text"
70
- )
71
- response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
72
  return {"transcription": response}
73
 
74
  @router.post("/api/text-to-speech")
75
  async def text_to_speech_endpoint(req: dict):
76
  text = req.get("text", "")
77
  model_name, api_endpoint = select_model("text to speech", input_type="text")
78
- stream = request_generation(
79
  api_key=HF_TOKEN,
80
  api_base=api_endpoint,
81
  message=text,
@@ -84,9 +81,8 @@ async def text_to_speech_endpoint(req: dict):
84
  temperature=0.7,
85
  max_new_tokens=128000,
86
  input_type="text",
87
- output_type="speech"
88
  )
89
- audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
90
  return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
91
 
92
  @router.post("/api/code")
@@ -96,7 +92,7 @@ async def code_endpoint(req: dict):
96
  code = req.get("code", "")
97
  prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
98
  model_name, api_endpoint = select_model(prompt)
99
- stream = request_generation(
100
  api_key=HF_TOKEN,
101
  api_base=api_endpoint,
102
  message=prompt,
@@ -104,16 +100,14 @@ async def code_endpoint(req: dict):
104
  model_name=model_name,
105
  temperature=0.7,
106
  max_new_tokens=128000,
107
- output_type="text"
108
- )
109
- response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
110
  return {"generated_code": response}
111
 
112
  @router.post("/api/analysis")
113
  async def analysis_endpoint(req: dict):
114
  message = req.get("text", "")
115
  model_name, api_endpoint = select_model(message)
116
- stream = request_generation(
117
  api_key=HF_TOKEN,
118
  api_base=api_endpoint,
119
  message=message,
@@ -121,16 +115,14 @@ async def analysis_endpoint(req: dict):
121
  model_name=model_name,
122
  temperature=0.7,
123
  max_new_tokens=128000,
124
- output_type="text"
125
- )
126
- response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
127
  return {"analysis": response}
128
 
129
  @router.post("/api/image-analysis")
130
  async def image_analysis_endpoint(file: UploadFile = File(...)):
131
  model_name, api_endpoint = select_model("image analysis", input_type="image")
132
  image_data = await file.read()
133
- stream = request_generation(
134
  api_key=HF_TOKEN,
135
  api_base=api_endpoint,
136
  message="Analyze this image",
@@ -140,9 +132,7 @@ async def image_analysis_endpoint(file: UploadFile = File(...)):
140
  max_new_tokens=128000,
141
  input_type="image",
142
  image_data=image_data,
143
- output_type="text"
144
- )
145
- response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
146
  return {"image_analysis": response}
147
 
148
  @router.get("/api/test-model")
 
11
 
12
  HF_TOKEN = os.getenv("HF_TOKEN")
13
  BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
14
+ API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co")
15
+ MODEL_NAME = os.getenv("MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
16
 
17
  @router.get("/api/model-info")
18
  def model_info():
19
  return {
20
  "model_name": MODEL_NAME,
21
+ "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct"),
22
+ "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x22B-Instruct-v0.1"),
23
  "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
24
  "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
25
  "api_base": API_ENDPOINT,
 
36
 
37
  @router.post("/api/chat")
38
  async def chat_endpoint(req: QueryRequest):
39
+ model_name, api_endpoint = select_model(req.message)
40
  stream = request_generation(
41
  api_key=HF_TOKEN,
42
  api_base=api_endpoint,
 
47
  temperature=req.temperature,
48
  max_new_tokens=req.max_new_tokens,
49
  deep_search=req.enable_browsing,
 
50
  )
51
  response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
52
  return {"response": response}
 
55
  async def audio_transcription_endpoint(file: UploadFile = File(...)):
56
  model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
57
  audio_data = await file.read()
58
+ response = "".join([chunk for chunk in request_generation(
59
  api_key=HF_TOKEN,
60
  api_base=api_endpoint,
61
  message="Transcribe audio",
 
65
  max_new_tokens=128000,
66
  input_type="audio",
67
  audio_data=audio_data,
68
+ ) if isinstance(chunk, str)])
 
 
69
  return {"transcription": response}
70
 
71
  @router.post("/api/text-to-speech")
72
  async def text_to_speech_endpoint(req: dict):
73
  text = req.get("text", "")
74
  model_name, api_endpoint = select_model("text to speech", input_type="text")
75
+ response = request_generation(
76
  api_key=HF_TOKEN,
77
  api_base=api_endpoint,
78
  message=text,
 
81
  temperature=0.7,
82
  max_new_tokens=128000,
83
  input_type="text",
 
84
  )
85
+ audio_data = b"".join([chunk for chunk in response if isinstance(chunk, bytes)])
86
  return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
87
 
88
  @router.post("/api/code")
 
92
  code = req.get("code", "")
93
  prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
94
  model_name, api_endpoint = select_model(prompt)
95
+ response = "".join([chunk for chunk in request_generation(
96
  api_key=HF_TOKEN,
97
  api_base=api_endpoint,
98
  message=prompt,
 
100
  model_name=model_name,
101
  temperature=0.7,
102
  max_new_tokens=128000,
103
+ ) if isinstance(chunk, str)])
 
 
104
  return {"generated_code": response}
105
 
106
  @router.post("/api/analysis")
107
  async def analysis_endpoint(req: dict):
108
  message = req.get("text", "")
109
  model_name, api_endpoint = select_model(message)
110
+ response = "".join([chunk for chunk in request_generation(
111
  api_key=HF_TOKEN,
112
  api_base=api_endpoint,
113
  message=message,
 
115
  model_name=model_name,
116
  temperature=0.7,
117
  max_new_tokens=128000,
118
+ ) if isinstance(chunk, str)])
 
 
119
  return {"analysis": response}
120
 
121
  @router.post("/api/image-analysis")
122
  async def image_analysis_endpoint(file: UploadFile = File(...)):
123
  model_name, api_endpoint = select_model("image analysis", input_type="image")
124
  image_data = await file.read()
125
+ response = "".join([chunk for chunk in request_generation(
126
  api_key=HF_TOKEN,
127
  api_base=api_endpoint,
128
  message="Analyze this image",
 
132
  max_new_tokens=128000,
133
  input_type="image",
134
  image_data=image_data,
135
+ ) if isinstance(chunk, str)])
 
 
136
  return {"image_analysis": response}
137
 
138
  @router.get("/api/test-model")
api/models.py CHANGED
@@ -3,8 +3,8 @@ from typing import List, Optional
3
 
4
  class QueryRequest(BaseModel):
5
  message: str
6
- system_prompt: str = "You are an expert assistant providing detailed, comprehensive, and well-structured responses. For code, include comments, examples, and complete implementations. For image-related queries, provide detailed analysis or descriptions. For general queries, provide in-depth explanations with examples and additional context where applicable. Continue generating content until the query is fully addressed, leveraging the full capacity of the model."
7
  history: Optional[List[dict]] = None
8
  temperature: float = 0.7
9
  max_new_tokens: int = 128000
10
- enable_browsing: bool = False
 
3
 
4
  class QueryRequest(BaseModel):
5
  message: str
6
+ system_prompt: str = "You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, and image inputs. Transcribe audio using Whisper, convert text to speech using Parler-TTS, and analyze images using CLIP. Respond with text or audio based on input type. Continue until the query is fully addressed."
7
  history: Optional[List[dict]] = None
8
  temperature: float = 0.7
9
  max_new_tokens: int = 128000
10
+ enable_browsing: bool = True
main.py CHANGED
@@ -29,86 +29,48 @@ if not HF_TOKEN:
29
  QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
30
  CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
31
 
32
- # إعداد CSS
33
  css = """
34
  .gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
35
- .chatbot { border: 1px solid #ccc; border-radius: 12px; padding: 20px; background-color: #f0f4f8; }
36
- .input-textbox { font-size: 18px; padding: 12px; border-radius: 8px; }
37
- .upload-button::before {
38
- content: '📸';
39
- margin-right: 10px;
40
- font-size: 24px;
41
  }
42
- .audio-input::before {
43
- content: '🎙️';
44
- margin-right: 10px;
45
- font-size: 24px;
46
- }
47
- .audio-output::before {
48
- content: '🔊';
49
- margin-right: 10px;
50
- font-size: 24px;
51
- }
52
- .send-button {
53
- background-color: #007bff;
54
- color: white;
55
- padding: 10px 20px;
56
- border-radius: 8px;
57
- cursor: pointer;
58
- font-size: 16px;
59
- transition: background-color 0.3s;
60
- }
61
- .send-button:hover {
62
- background-color: #0056b3;
63
  }
 
64
  .loading::after {
65
- content: '';
66
- display: inline-block;
67
- width: 18px;
68
- height: 18px;
69
- border: 3px solid #007bff;
70
- border-top-color: transparent;
71
- border-radius: 50%;
72
- animation: spin 1s linear infinite;
73
- margin-left: 10px;
74
- }
75
- @keyframes spin {
76
- to { transform: rotate(360deg); }
77
  }
78
- .output-container {
79
- margin-top: 20px;
80
- padding: 15px;
81
- border: 1px solid #ddd;
82
- border-radius: 10px;
83
- background-color: #fff;
84
- }
85
- .audio-output-container {
86
- display: flex;
87
- align-items: center;
88
- gap: 12px;
89
- margin-top: 15px;
90
- }
91
- .model-selector {
92
- border-radius: 8px;
93
- padding: 10px;
94
- font-size: 16px;
95
  }
96
  """
97
 
98
- # دالة لمعالجة الإدخال (نص، صوت، صور، ملفات)
99
- def process_input(message, audio_input=None, image_input=None, model_choice="openai/gpt-oss-120b:cerebras", history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000, output_type="text"):
100
  input_type = "text"
101
  audio_data = None
102
  image_data = None
103
-
104
  if audio_input:
105
  input_type = "audio"
106
  audio_data = audio_input
107
- message = "Transcribe this audio and respond accordingly"
108
  elif image_input:
109
  input_type = "image"
110
  image_data = image_input
111
- message = f"Analyze this image: {message or 'Describe the image'}"
112
 
113
  response_text = ""
114
  audio_response = None
@@ -122,9 +84,7 @@ def process_input(message, audio_input=None, image_input=None, model_choice="ope
122
  max_new_tokens=max_new_tokens,
123
  input_type=input_type,
124
  audio_data=audio_data,
125
- image_data=image_data,
126
- model_choice=model_choice,
127
- output_type=output_type
128
  ):
129
  if isinstance(chunk, bytes):
130
  audio_response = io.BytesIO(chunk)
@@ -140,47 +100,34 @@ chatbot_ui = gr.ChatInterface(
140
  label="MGZon Chatbot",
141
  height=800,
142
  latex_delimiters=LATEX_DELIMS,
 
143
  ),
144
  additional_inputs_accordion=gr.Accordion("⚙️ Settings", open=True),
145
  additional_inputs=[
146
  gr.Textbox(
147
  label="System Prompt",
148
- value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image, and file inputs. For audio, transcribe using Whisper and respond with text or speech. For images, analyze using CLIP and provide detailed descriptions. For general queries, use the selected model to provide in-depth answers.",
149
  lines=4
150
  ),
151
  gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
152
  gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
153
- gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
154
  gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
155
- gr.Dropdown(
156
- label="Model Choice",
157
- choices=[
158
- "openai/gpt-oss-120b:cerebras",
159
- "openai/gpt-oss-20b:together",
160
- "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
161
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
162
- "openai/clip-vit-base-patch32",
163
- "openai/whisper-large-v3-turbo",
164
- "parler-tts/parler-tts-mini-v1"
165
- ],
166
- value="openai/gpt-oss-120b:cerebras",
167
- elem_classes="model-selector"
168
- ),
169
- gr.Audio(label="Record & Send Voice", type="numpy", streaming=True, elem_classes="audio-input"),
170
- gr.Image(label="Capture & Send Image", type="numpy", source="webcam", elem_classes="upload-button"),
171
- gr.Radio(label="Output Type", choices=["text", "speech"], value="text")
172
  ],
173
- additional_outputs=[gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output", autoplay=True)],
174
  stop_btn="Stop",
175
  examples=[
176
  ["Explain the history of AI in detail."],
177
  ["Generate a React login component with validation."],
178
- ["Describe this image: [capture image]."],
179
- ["Transcribe and respond to this audio: [record audio]."],
180
- ["Convert this text to speech: Welcome to MGZon!"],
181
  ],
182
  title="MGZon Chatbot",
183
- description="A versatile chatbot powered by multiple models for text, image, and audio queries. Supports real-time voice and image input, model selection, and web search. Licensed under Apache 2.0.",
184
  theme="gradio/soft",
185
  css=css,
186
  )
 
29
  QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
30
  CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
31
 
32
+ # إعداد CSS محسّن
33
  css = """
34
  .gradio-container { max-width: 1200px; margin: auto; font-family: Arial, sans-serif; }
35
+ .chatbot { border: 1px solid #ccc; border-radius: 12px; padding: 20px; background-color: #f5f5f5; }
36
+ .input-textbox { font-size: 16px; padding: 12px; border-radius: 8px; }
37
+ .upload-button, .audio-button, .camera-button {
38
+ background-color: #007bff; color: white; padding: 10px 20px; border-radius: 8px;
39
+ display: inline-flex; align-items: center; gap: 8px; font-size: 16px;
 
40
  }
41
+ .upload-button::before { content: '📷'; font-size: 20px; }
42
+ .audio-button::before { content: '🎤'; font-size: 20px; }
43
+ .camera-button::before { content: '📸'; font-size: 20px; }
44
+ .audio-output-container {
45
+ display: flex; align-items: center; gap: 12px; margin-top: 15px;
46
+ background-color: #e9ecef; padding: 10px; border-radius: 8px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  }
48
+ .audio-output-container::before { content: '🔊'; font-size: 20px; }
49
  .loading::after {
50
+ content: ''; display: inline-block; width: 18px; height: 18px;
51
+ border: 3px solid #007bff; border-top-color: transparent;
52
+ border-radius: 50%; animation: spin 1s linear infinite; margin-left: 10px;
 
 
 
 
 
 
 
 
 
53
  }
54
+ @keyframes spin { to { transform: rotate(360deg); } }
55
+ .output-container {
56
+ margin-top: 20px; padding: 15px; border: 1px solid #ddd;
57
+ border-radius: 10px; background-color: white;
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  }
59
  """
60
 
61
+ # دالة لمعالجة الإدخال
62
+ def process_input(message, audio_input=None, image_input=None, history=None, system_prompt=None, temperature=0.7, reasoning_effort="medium", enable_browsing=True, max_new_tokens=128000):
63
  input_type = "text"
64
  audio_data = None
65
  image_data = None
 
66
  if audio_input:
67
  input_type = "audio"
68
  audio_data = audio_input
69
+ message = "Transcribe this audio"
70
  elif image_input:
71
  input_type = "image"
72
  image_data = image_input
73
+ message = f"Analyze image: {message or 'describe this image'}"
74
 
75
  response_text = ""
76
  audio_response = None
 
84
  max_new_tokens=max_new_tokens,
85
  input_type=input_type,
86
  audio_data=audio_data,
87
+ image_data=image_data
 
 
88
  ):
89
  if isinstance(chunk, bytes):
90
  audio_response = io.BytesIO(chunk)
 
100
  label="MGZon Chatbot",
101
  height=800,
102
  latex_delimiters=LATEX_DELIMS,
103
+ elem_classes="chatbot",
104
  ),
105
  additional_inputs_accordion=gr.Accordion("⚙️ Settings", open=True),
106
  additional_inputs=[
107
  gr.Textbox(
108
  label="System Prompt",
109
+ value="You are an expert assistant providing detailed, comprehensive, and well-structured responses. Support text, audio, image inputs. Transcribe audio using Whisper, convert text to speech using Parler-TTS, and analyze images using CLIP. Respond with text or audio based on input type. Continue until the query is fully addressed.",
110
  lines=4
111
  ),
112
  gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
113
  gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
114
+ gr.Checkbox(label="Enable DeepSearch", value=True),
115
  gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=128000),
116
+ gr.Audio(label="Record Audio", source="microphone", type="numpy", elem_classes="audio-button"),
117
+ gr.Image(label="Capture Image", source="webcam", type="numpy", elem_classes="camera-button"),
118
+ gr.File(label="Upload Image/File", file_types=["image", ".pdf", ".txt"], elem_classes="upload-button"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  ],
120
+ additional_outputs=[gr.Audio(label="Voice Output", type="filepath", elem_classes="audio-output-container", autoplay=True)],
121
  stop_btn="Stop",
122
  examples=[
123
  ["Explain the history of AI in detail."],
124
  ["Generate a React login component with validation."],
125
+ ["Describe this image: [capture or upload image]"],
126
+ ["Transcribe this audio: [record audio]"],
127
+ ["Convert to speech: Hello, welcome to MGZon!"],
128
  ],
129
  title="MGZon Chatbot",
130
+ description="A versatile chatbot powered by Hugging Face models for text, image, and audio queries. Supports real-time audio recording, webcam image capture, and web search. Licensed under Apache 2.0.",
131
  theme="gradio/soft",
132
  css=css,
133
  )
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
  fastapi==0.115.2
2
  uvicorn==0.30.6
3
- gradio>=4.44.1
4
  openai==1.42.0
5
  httpx==0.27.0
6
  python-dotenv==1.0.1
7
  pydocstyle==6.3.0
8
- requests==2.32.5
9
  beautifulsoup4==4.12.3
10
  tenacity==8.5.0
11
  selenium==4.25.0
@@ -15,10 +15,10 @@ cachetools==5.5.0
15
  pydub==0.25.1
16
  ffmpeg-python==0.2.0
17
  numpy==1.26.4
18
- parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
19
  torch==2.4.1
20
  torchaudio==2.4.1
21
- transformers==4.43.3
22
  webrtcvad==2.0.10
23
  Pillow==10.4.0
24
  urllib3==2.0.7
 
1
  fastapi==0.115.2
2
  uvicorn==0.30.6
3
+ gradio==4.48.0
4
  openai==1.42.0
5
  httpx==0.27.0
6
  python-dotenv==1.0.1
7
  pydocstyle==6.3.0
8
+ requests==2.32.3
9
  beautifulsoup4==4.12.3
10
  tenacity==8.5.0
11
  selenium==4.25.0
 
15
  pydub==0.25.1
16
  ffmpeg-python==0.2.0
17
  numpy==1.26.4
18
+ parler-tts==0.2.0
19
  torch==2.4.1
20
  torchaudio==2.4.1
21
+ transformers==4.45.1
22
  webrtcvad==2.0.10
23
  Pillow==10.4.0
24
  urllib3==2.0.7
utils/generation.py CHANGED
@@ -13,9 +13,10 @@ import pydub
13
  import io
14
  import torchaudio
15
  from PIL import Image
 
16
  from transformers import CLIPModel, CLIPProcessor, AutoProcessor
17
  from parler_tts import ParlerTTSForConditionalGeneration
18
- from utils.web_search import web_search # نقل الاستيراد خارج الدالة
19
 
20
  logger = logging.getLogger(__name__)
21
 
@@ -33,14 +34,14 @@ LATEX_DELIMS = [
33
  # إعداد العميل لـ Hugging Face Inference API
34
  HF_TOKEN = os.getenv("HF_TOKEN")
35
  BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
36
- API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
37
- FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
38
- MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
39
- SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "openai/gpt-oss-20b:together")
40
- TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
41
  CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")
42
  CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
43
- ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3-turbo")
44
  TTS_MODEL = os.getenv("TTS_MODEL", "parler-tts/parler-tts-mini-v1")
45
 
46
  def check_model_availability(model_name: str, api_base: str, api_key: str) -> tuple[bool, str]:
@@ -64,11 +65,7 @@ def check_model_availability(model_name: str, api_base: str, api_key: str) -> tu
64
  return check_model_availability(model_name, api_base, BACKUP_HF_TOKEN)
65
  return False, api_key
66
 
67
- def select_model(query: str, input_type: str = "text", model_choice: Optional[str] = None) -> tuple[str, str]:
68
- if model_choice:
69
- logger.info(f"User-selected model: {model_choice}")
70
- return model_choice, API_ENDPOINT if model_choice in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME] else FALLBACK_API_ENDPOINT
71
-
72
  query_lower = query.lower()
73
  if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
74
  logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
@@ -104,13 +101,14 @@ def request_generation(
104
  input_type: str = "text",
105
  audio_data: Optional[bytes] = None,
106
  image_data: Optional[bytes] = None,
107
- output_type: str = "text"
108
  ) -> Generator[bytes | str, None, None]:
 
109
  is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
110
  if not is_available:
111
  yield f"Error: Model {model_name} is not available. Please check the model endpoint or token."
112
  return
113
 
 
114
  cache_key = hashlib.md5(json.dumps({
115
  "message": message,
116
  "system_prompt": system_prompt,
@@ -134,7 +132,7 @@ def request_generation(
134
  if model_name == ASR_MODEL and audio_data is not None:
135
  task_type = "audio_transcription"
136
  try:
137
- audio_file = io.BytesIO(audio_data)
138
  audio = pydub.AudioSegment.from_file(audio_file)
139
  audio = audio.set_channels(1).set_frame_rate(16000)
140
  audio_file = io.BytesIO()
@@ -146,15 +144,6 @@ def request_generation(
146
  response_format="text"
147
  )
148
  yield transcription
149
- if output_type == "speech":
150
- tts_model = TTS_MODEL
151
- tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=transcription, return_tensors="pt")
152
- tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
153
- audio = tts_model_instance.generate(**tts_inputs)
154
- audio_file = io.BytesIO()
155
- torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
156
- audio_file.seek(0)
157
- yield audio_file.read()
158
  cache[cache_key] = [transcription]
159
  return
160
  except Exception as e:
@@ -163,11 +152,11 @@ def request_generation(
163
  return
164
 
165
  # معالجة تحويل النص إلى صوت (TTS)
166
- if model_name == TTS_MODEL or output_type == "speech":
167
  task_type = "text_to_speech"
168
  try:
169
- model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
170
- processor = AutoProcessor.from_pretrained(TTS_MODEL)
171
  inputs = processor(text=message, return_tensors="pt")
172
  audio = model.generate(**inputs)
173
  audio_file = io.BytesIO()
@@ -187,23 +176,13 @@ def request_generation(
187
  try:
188
  model = CLIPModel.from_pretrained(model_name)
189
  processor = CLIPProcessor.from_pretrained(model_name)
190
- image = Image.open(io.BytesIO(image_data)).convert("RGB")
191
  inputs = processor(text=message, images=image, return_tensors="pt", padding=True)
192
  outputs = model(**inputs)
193
  logits_per_image = outputs.logits_per_image
194
  probs = logits_per_image.softmax(dim=1)
195
- analysis = f"Image analysis result: {probs.tolist()}"
196
- yield analysis
197
- if output_type == "speech":
198
- tts_model = TTS_MODEL
199
- tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=analysis, return_tensors="pt")
200
- tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
201
- audio = tts_model_instance.generate(**tts_inputs)
202
- audio_file = io.BytesIO()
203
- torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
204
- audio_file.seek(0)
205
- yield audio_file.read()
206
- cache[cache_key] = [analysis]
207
  return
208
  except Exception as e:
209
  logger.error(f"Image analysis failed: {e}")
@@ -213,16 +192,26 @@ def request_generation(
213
  # تحسين system_prompt بناءً على نوع المهمة
214
  if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
215
  task_type = "image"
216
- enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query."
217
  elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
218
  task_type = "code"
219
- enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations."
220
  elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
221
  task_type = "analysis"
222
- enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights."
 
 
 
 
 
 
223
  else:
224
- enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable."
 
 
 
225
 
 
226
  input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
227
  if chat_history:
228
  for msg in chat_history:
@@ -258,6 +247,8 @@ def request_generation(
258
  reasoning_started = False
259
  reasoning_closed = False
260
  saw_visible_output = False
 
 
261
  buffer = ""
262
 
263
  for chunk in stream:
@@ -285,6 +276,16 @@ def request_generation(
285
  buffer = ""
286
  continue
287
 
 
 
 
 
 
 
 
 
 
 
288
  if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
289
  if buffer:
290
  cached_chunks.append(buffer)
@@ -297,8 +298,16 @@ def request_generation(
297
  reasoning_closed = True
298
 
299
  if not saw_visible_output:
300
- cached_chunks.append("No visible output produced.")
301
- yield "No visible output produced."
 
 
 
 
 
 
 
 
302
  if chunk.choices[0].finish_reason == "error":
303
  cached_chunks.append(f"Error: Unknown error")
304
  yield f"Error: Unknown error"
@@ -311,16 +320,6 @@ def request_generation(
311
  cached_chunks.append(buffer)
312
  yield buffer
313
 
314
- if output_type == "speech":
315
- tts_model = TTS_MODEL
316
- tts_inputs = AutoProcessor.from_pretrained(tts_model)(text=buffer, return_tensors="pt")
317
- tts_model_instance = ParlerTTSForConditionalGeneration.from_pretrained(tts_model)
318
- audio = tts_model_instance.generate(**tts_inputs)
319
- audio_file = io.BytesIO()
320
- torchaudio.save(audio_file, audio[0], sample_rate=22050, format="wav")
321
- audio_file.seek(0)
322
- yield audio_file.read()
323
-
324
  cache[cache_key] = cached_chunks
325
 
326
  except Exception as e:
@@ -343,12 +342,134 @@ def request_generation(
343
  input_type=input_type,
344
  audio_data=audio_data,
345
  image_data=image_data,
346
- output_type=output_type
347
  ):
348
  yield chunk
349
  return
350
- yield f"Error: Failed to load model {model_name}: {e}"
351
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
  def format_final(analysis_text: str, visible_text: str) -> str:
354
  reasoning_safe = html.escape((analysis_text or "").strip())
@@ -364,12 +485,12 @@ def format_final(analysis_text: str, visible_text: str) -> str:
364
  f"{response}" if response else "No final response available."
365
  )
366
 
367
- def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None, model_choice=None, output_type="text"):
368
  if not message.strip() and not audio_data and not image_data:
369
  yield "Please enter a prompt, record audio, or capture an image."
370
  return
371
 
372
- model_name, api_endpoint = select_model(message, input_type=input_type, model_choice=model_choice)
373
  chat_history = []
374
  for h in history:
375
  if isinstance(h, dict):
@@ -398,7 +519,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
398
  "type": "function",
399
  "function": {
400
  "name": "code_generation",
401
- "description": "Generate or modify code for various frameworks",
402
  "parameters": {
403
  "type": "object",
404
  "properties": {
@@ -476,7 +597,6 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
476
  input_type=input_type,
477
  audio_data=audio_data,
478
  image_data=image_data,
479
- output_type=output_type
480
  )
481
 
482
  for chunk in stream:
 
13
  import io
14
  import torchaudio
15
  from PIL import Image
16
+ import numpy as np
17
  from transformers import CLIPModel, CLIPProcessor, AutoProcessor
18
  from parler_tts import ParlerTTSForConditionalGeneration
19
+ from utils.web_search import web_search # استيراد مباشر
20
 
21
  logger = logging.getLogger(__name__)
22
 
 
34
  # إعداد العميل لـ Hugging Face Inference API
35
  HF_TOKEN = os.getenv("HF_TOKEN")
36
  BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
37
+ API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co")
38
+ FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co"
39
+ MODEL_NAME = os.getenv("MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
40
+ SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct")
41
+ TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x22B-Instruct-v0.1")
42
  CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32")
43
  CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
44
+ ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
45
  TTS_MODEL = os.getenv("TTS_MODEL", "parler-tts/parler-tts-mini-v1")
46
 
47
  def check_model_availability(model_name: str, api_base: str, api_key: str) -> tuple[bool, str]:
 
65
  return check_model_availability(model_name, api_base, BACKUP_HF_TOKEN)
66
  return False, api_key
67
 
68
+ def select_model(query: str, input_type: str = "text") -> tuple[str, str]:
 
 
 
 
69
  query_lower = query.lower()
70
  if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
71
  logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
 
101
  input_type: str = "text",
102
  audio_data: Optional[bytes] = None,
103
  image_data: Optional[bytes] = None,
 
104
  ) -> Generator[bytes | str, None, None]:
105
+ # التحقق من توفر النموذج
106
  is_available, selected_api_key = check_model_availability(model_name, api_base, api_key)
107
  if not is_available:
108
  yield f"Error: Model {model_name} is not available. Please check the model endpoint or token."
109
  return
110
 
111
+ # إنشاء مفتاح للـ cache
112
  cache_key = hashlib.md5(json.dumps({
113
  "message": message,
114
  "system_prompt": system_prompt,
 
132
  if model_name == ASR_MODEL and audio_data is not None:
133
  task_type = "audio_transcription"
134
  try:
135
+ audio_file = io.BytesIO(audio_data if isinstance(audio_data, bytes) else audio_data.tobytes())
136
  audio = pydub.AudioSegment.from_file(audio_file)
137
  audio = audio.set_channels(1).set_frame_rate(16000)
138
  audio_file = io.BytesIO()
 
144
  response_format="text"
145
  )
146
  yield transcription
 
 
 
 
 
 
 
 
 
147
  cache[cache_key] = [transcription]
148
  return
149
  except Exception as e:
 
152
  return
153
 
154
  # معالجة تحويل النص إلى صوت (TTS)
155
+ if model_name == TTS_MODEL:
156
  task_type = "text_to_speech"
157
  try:
158
+ model = ParlerTTSForConditionalGeneration.from_pretrained(model_name)
159
+ processor = AutoProcessor.from_pretrained(model_name)
160
  inputs = processor(text=message, return_tensors="pt")
161
  audio = model.generate(**inputs)
162
  audio_file = io.BytesIO()
 
176
  try:
177
  model = CLIPModel.from_pretrained(model_name)
178
  processor = CLIPProcessor.from_pretrained(model_name)
179
+ image = Image.fromarray(np.uint8(image_data)) if isinstance(image_data, np.ndarray) else Image.open(io.BytesIO(image_data)).convert("RGB")
180
  inputs = processor(text=message, images=image, return_tensors="pt", padding=True)
181
  outputs = model(**inputs)
182
  logits_per_image = outputs.logits_per_image
183
  probs = logits_per_image.softmax(dim=1)
184
+ yield f"Image analysis result: {probs.tolist()}"
185
+ cache[cache_key] = [f"Image analysis result: {probs.tolist()}"]
 
 
 
 
 
 
 
 
 
 
186
  return
187
  except Exception as e:
188
  logger.error(f"Image analysis failed: {e}")
 
192
  # تحسين system_prompt بناءً على نوع المهمة
193
  if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
194
  task_type = "image"
195
+ enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query. Continue until the query is fully addressed."
196
  elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
197
  task_type = "code"
198
+ enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations. Support frameworks like React, Django, Flask, and others. Format code with triple backticks (```) and specify the language. Continue until the task is fully addressed."
199
  elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
200
  task_type = "analysis"
201
+ enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights. Continue until all aspects of the query are thoroughly covered."
202
+ elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
203
+ task_type = "review"
204
+ enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations. Ensure the response is complete and detailed."
205
+ elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
206
+ task_type = "publish"
207
+ enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices. Provide a complete and detailed response."
208
  else:
209
+ enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable. Continue generating content until the query is fully answered, leveraging the full capacity of the model."
210
+
211
+ if len(message.split()) < 5:
212
+ enhanced_system_prompt += "\nEven for short or general queries, provide a detailed, in-depth response with examples, explanations, and additional context to ensure completeness."
213
 
214
+ logger.info(f"Task type detected: {task_type}")
215
  input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
216
  if chat_history:
217
  for msg in chat_history:
 
247
  reasoning_started = False
248
  reasoning_closed = False
249
  saw_visible_output = False
250
+ last_tool_name = None
251
+ last_tool_args = None
252
  buffer = ""
253
 
254
  for chunk in stream:
 
276
  buffer = ""
277
  continue
278
 
279
+ if chunk.choices[0].delta.tool_calls and model_name in [MODEL_NAME, SECONDARY_MODEL_NAME, TERTIARY_MODEL_NAME]:
280
+ tool_call = chunk.choices[0].delta.tool_calls[0]
281
+ name = getattr(tool_call, "function", {}).get("name", None)
282
+ args = getattr(tool_call, "function", {}).get("arguments", None)
283
+ if name:
284
+ last_tool_name = name
285
+ if args:
286
+ last_tool_args = args
287
+ continue
288
+
289
  if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error", "length"):
290
  if buffer:
291
  cached_chunks.append(buffer)
 
298
  reasoning_closed = True
299
 
300
  if not saw_visible_output:
301
+ msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced."
302
+ if last_tool_name:
303
+ try:
304
+ args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
305
+ except Exception:
306
+ args_text = str(last_tool_args)
307
+ msg += f"\n\n• Tool requested: **{last_tool_name}**\n• Arguments: `{args_text}`"
308
+ cached_chunks.append(msg)
309
+ yield msg
310
+
311
  if chunk.choices[0].finish_reason == "error":
312
  cached_chunks.append(f"Error: Unknown error")
313
  yield f"Error: Unknown error"
 
320
  cached_chunks.append(buffer)
321
  yield buffer
322
 
 
 
 
 
 
 
 
 
 
 
323
  cache[cache_key] = cached_chunks
324
 
325
  except Exception as e:
 
342
  input_type=input_type,
343
  audio_data=audio_data,
344
  image_data=image_data,
 
345
  ):
346
  yield chunk
347
  return
348
+ if model_name == MODEL_NAME:
349
+ fallback_model = SECONDARY_MODEL_NAME
350
+ fallback_endpoint = FALLBACK_API_ENDPOINT
351
+ logger.info(f"Retrying with fallback model: {fallback_model} on {fallback_endpoint}")
352
+ try:
353
+ is_available, selected_api_key = check_model_availability(fallback_model, fallback_endpoint, selected_api_key)
354
+ if not is_available:
355
+ yield f"Error: Fallback model {fallback_model} is not available."
356
+ return
357
+ client = OpenAI(api_key=selected_api_key, base_url=fallback_endpoint, timeout=120.0)
358
+ stream = client.chat.completions.create(
359
+ model=fallback_model,
360
+ messages=input_messages,
361
+ temperature=temperature,
362
+ max_tokens=max_new_tokens,
363
+ stream=True,
364
+ tools=[],
365
+ tool_choice="none",
366
+ )
367
+ for chunk in stream:
368
+ if chunk.choices[0].delta.content:
369
+ content = chunk.choices[0].delta.content
370
+ if content == "<|channel|>analysis<|message|>":
371
+ if not reasoning_started:
372
+ cached_chunks.append("analysis")
373
+ yield "analysis"
374
+ reasoning_started = True
375
+ continue
376
+ if content == "<|channel|>final<|message|>":
377
+ if reasoning_started and not reasoning_closed:
378
+ cached_chunks.append("assistantfinal")
379
+ yield "assistantfinal"
380
+ reasoning_closed = True
381
+ continue
382
+
383
+ saw_visible_output = True
384
+ buffer += content
385
+
386
+ if "\n" in buffer or len(buffer) > 5000:
387
+ cached_chunks.append(buffer)
388
+ yield buffer
389
+ buffer = ""
390
+ continue
391
+
392
+ if chunk.choices[0].finish_reason in ("stop", "error", "length"):
393
+ if buffer:
394
+ cached_chunks.append(buffer)
395
+ yield buffer
396
+ buffer = ""
397
+
398
+ if reasoning_started and not reasoning_closed:
399
+ cached_chunks.append("assistantfinal")
400
+ yield "assistantfinal"
401
+ reasoning_closed = True
402
+
403
+ if not saw_visible_output:
404
+ cached_chunks.append("No visible output produced.")
405
+ yield "No visible output produced."
406
+ if chunk.choices[0].finish_reason == "error":
407
+ cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
408
+ yield f"Error: Unknown error with fallback model {fallback_model}"
409
+ elif chunk.choices[0].finish_reason == "length":
410
+ cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
411
+ yield "Response truncated due to token limit. Please refine your query or request continuation."
412
+ break
413
+
414
+ if buffer:
415
+ cached_chunks.append(buffer)
416
+ yield buffer
417
+
418
+ cache[cache_key] = cached_chunks
419
+
420
+ except Exception as e2:
421
+ logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
422
+ try:
423
+ is_available, selected_api_key = check_model_availability(TERTIARY_MODEL_NAME, FALLBACK_API_ENDPOINT, selected_api_key)
424
+ if not is_available:
425
+ yield f"Error: Tertiary model {TERTIARY_MODEL_NAME} is not available."
426
+ return
427
+ client = OpenAI(api_key=selected_api_key, base_url=FALLBACK_API_ENDPOINT, timeout=120.0)
428
+ stream = client.chat.completions.create(
429
+ model=TERTIARY_MODEL_NAME,
430
+ messages=input_messages,
431
+ temperature=temperature,
432
+ max_tokens=max_new_tokens,
433
+ stream=True,
434
+ tools=[],
435
+ tool_choice="none",
436
+ )
437
+ for chunk in stream:
438
+ if chunk.choices[0].delta.content:
439
+ content = chunk.choices[0].delta.content
440
+ saw_visible_output = True
441
+ buffer += content
442
+ if "\n" in buffer or len(buffer) > 5000:
443
+ cached_chunks.append(buffer)
444
+ yield buffer
445
+ buffer = ""
446
+ continue
447
+ if chunk.choices[0].finish_reason in ("stop", "error", "length"):
448
+ if buffer:
449
+ cached_chunks.append(buffer)
450
+ yield buffer
451
+ buffer = ""
452
+ if not saw_visible_output:
453
+ cached_chunks.append("No visible output produced.")
454
+ yield "No visible output produced."
455
+ if chunk.choices[0].finish_reason == "error":
456
+ cached_chunks.append(f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}")
457
+ yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
458
+ elif chunk.choices[0].finish_reason == "length":
459
+ cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
460
+ yield "Response truncated due to token limit. Please refine your query or request continuation."
461
+ break
462
+ if buffer:
463
+ cached_chunks.append(buffer)
464
+ yield buffer
465
+ cache[cache_key] = cached_chunks
466
+ except Exception as e3:
467
+ logger.exception(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
468
+ yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({fallback_model}), Tertiary ({TERTIARY_MODEL_NAME}). Please check your model configurations."
469
+ return
470
+ else:
471
+ yield f"Error: Failed to load model {model_name}: {e}"
472
+ return
473
 
474
  def format_final(analysis_text: str, visible_text: str) -> str:
475
  reasoning_safe = html.escape((analysis_text or "").strip())
 
485
  f"{response}" if response else "No final response available."
486
  )
487
 
488
+ def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens, input_type="text", audio_data=None, image_data=None):
489
  if not message.strip() and not audio_data and not image_data:
490
  yield "Please enter a prompt, record audio, or capture an image."
491
  return
492
 
493
+ model_name, api_endpoint = select_model(message, input_type=input_type)
494
  chat_history = []
495
  for h in history:
496
  if isinstance(h, dict):
 
519
  "type": "function",
520
  "function": {
521
  "name": "code_generation",
522
+ "description": "Generate or modify code for various frameworks (React, Django, Flask, etc.)",
523
  "parameters": {
524
  "type": "object",
525
  "properties": {
 
597
  input_type=input_type,
598
  audio_data=audio_data,
599
  image_data=image_data,
 
600
  )
601
 
602
  for chunk in stream:
utils/web_search.py CHANGED
@@ -10,29 +10,31 @@ def web_search(query: str) -> str:
10
  google_api_key = os.getenv("GOOGLE_API_KEY")
11
  google_cse_id = os.getenv("GOOGLE_CSE_ID")
12
  if not google_api_key or not google_cse_id:
 
13
  return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
14
  url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
15
- response = requests.get(url, timeout=5)
16
  response.raise_for_status()
17
  results = response.json().get("items", [])
18
  if not results:
 
19
  return "No web results found."
20
  search_results = []
21
- for i, item in enumerate(results[:3]): # قللنا العدد لتسريع البحث
22
  title = item.get("title", "")
23
  snippet = item.get("snippet", "")
24
  link = item.get("link", "")
25
  try:
26
- page_response = requests.get(link, timeout=3)
27
  page_response.raise_for_status()
28
  soup = BeautifulSoup(page_response.text, "html.parser")
29
  paragraphs = soup.find_all("p")
30
- page_content = " ".join([p.get_text() for p in paragraphs][:500])
31
  except Exception as e:
32
  logger.warning(f"Failed to fetch page content for {link}: {e}")
33
  page_content = snippet
34
  search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
35
  return "\n".join(search_results)
36
  except Exception as e:
37
- logger.exception("Web search failed")
38
  return f"Web search error: {e}"
 
10
  google_api_key = os.getenv("GOOGLE_API_KEY")
11
  google_cse_id = os.getenv("GOOGLE_CSE_ID")
12
  if not google_api_key or not google_cse_id:
13
+ logger.warning("GOOGLE_API_KEY or GOOGLE_CSE_ID not set.")
14
  return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
15
  url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
16
+ response = requests.get(url, timeout=10)
17
  response.raise_for_status()
18
  results = response.json().get("items", [])
19
  if not results:
20
+ logger.info(f"No web results found for query: {query}")
21
  return "No web results found."
22
  search_results = []
23
+ for i, item in enumerate(results[:5]):
24
  title = item.get("title", "")
25
  snippet = item.get("snippet", "")
26
  link = item.get("link", "")
27
  try:
28
+ page_response = requests.get(link, timeout=5)
29
  page_response.raise_for_status()
30
  soup = BeautifulSoup(page_response.text, "html.parser")
31
  paragraphs = soup.find_all("p")
32
+ page_content = " ".join([p.get_text() for p in paragraphs][:1000])
33
  except Exception as e:
34
  logger.warning(f"Failed to fetch page content for {link}: {e}")
35
  page_content = snippet
36
  search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
37
  return "\n".join(search_results)
38
  except Exception as e:
39
+ logger.exception(f"Web search failed for query: {query}")
40
  return f"Web search error: {e}"