MGZON commited on
Commit
6346369
·
1 Parent(s): 27444a4

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +156 -30
main.py CHANGED
@@ -9,6 +9,17 @@ import requests
9
  from bs4 import BeautifulSoup
10
  import re
11
  from tenacity import retry, stop_after_attempt, wait_exponential
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # تعريف LATEX_DELIMS
14
  LATEX_DELIMS = [
@@ -22,7 +33,7 @@ LATEX_DELIMS = [
22
  logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
24
 
25
- # تحقق من الملفات في /app/ (للتصحيح)
26
  logger.info("Files in /app/: %s", os.listdir("/app"))
27
 
28
  # إعداد العميل لـ Hugging Face Inference API
@@ -30,7 +41,9 @@ HF_TOKEN = os.getenv("HF_TOKEN")
30
  API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
31
  FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
32
  MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
33
- SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
 
 
34
  if not HF_TOKEN:
35
  logger.error("HF_TOKEN is not set in environment variables.")
36
  raise ValueError("HF_TOKEN is required for Inference API.")
@@ -39,6 +52,20 @@ if not HF_TOKEN:
39
  QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
40
  CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # دالة بحث ويب محسنة
43
  def web_search(query: str) -> str:
44
  try:
@@ -46,16 +73,14 @@ def web_search(query: str) -> str:
46
  google_cse_id = os.getenv("GOOGLE_CSE_ID")
47
  if not google_api_key or not google_cse_id:
48
  return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
49
-
50
- url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
51
- response = requests.get(url)
52
  response.raise_for_status()
53
  results = response.json().get("items", [])
54
  if not results:
55
  return "No web results found."
56
-
57
  search_results = []
58
- for i, item in enumerate(results[:3]):
59
  title = item.get("title", "")
60
  snippet = item.get("snippet", "")
61
  link = item.get("link", "")
@@ -64,18 +89,17 @@ def web_search(query: str) -> str:
64
  page_response.raise_for_status()
65
  soup = BeautifulSoup(page_response.text, "html.parser")
66
  paragraphs = soup.find_all("p")
67
- page_content = " ".join([p.get_text() for p in paragraphs][:500])
68
  except Exception as e:
69
  logger.warning(f"Failed to fetch page content for {link}: {e}")
70
  page_content = snippet
71
  search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
72
-
73
  return "\n".join(search_results)
74
  except Exception as e:
75
  logger.exception("Web search failed")
76
  return f"Web search error: {e}"
77
 
78
- # دالة request_generation مع retry
79
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
80
  def request_generation(
81
  api_key: str,
@@ -85,15 +109,13 @@ def request_generation(
85
  model_name: str,
86
  chat_history: Optional[List[dict]] = None,
87
  temperature: float = 0.9,
88
- max_new_tokens: int = 4096, # زيادة لحد 4096
89
  reasoning_effort: str = "off",
90
  tools: Optional[List[dict]] = None,
91
  tool_choice: Optional[str] = None,
92
  deep_search: bool = False,
93
  ) -> Generator[str, None, None]:
94
- """Streams Responses API events."""
95
- client = OpenAI(api_key=api_key, base_url=api_base)
96
-
97
  task_type = "general"
98
  if "code" in message.lower() or "programming" in message.lower() or any(ext in message.lower() for ext in ["python", "javascript", "react", "django", "flask"]):
99
  task_type = "code"
@@ -108,10 +130,9 @@ def request_generation(
108
  task_type = "publish"
109
  enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices."
110
  else:
111
- enhanced_system_prompt = f"{system_prompt}\nPlease provide detailed and comprehensive responses, including explanations, examples, and relevant details where applicable."
112
 
113
  logger.info(f"Task type detected: {task_type}")
114
-
115
  input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
116
  if chat_history:
117
  for msg in chat_history:
@@ -163,7 +184,7 @@ def request_generation(
163
  saw_visible_output = True
164
  buffer += content
165
 
166
- if "\n" in buffer or len(buffer) > 500: # زيادة الـ buffer لحد 500
167
  yield buffer
168
  buffer = ""
169
  continue
@@ -211,7 +232,7 @@ def request_generation(
211
  fallback_endpoint = FALLBACK_API_ENDPOINT
212
  logger.info(f"Retrying with fallback model: {fallback_model} on {fallback_endpoint}")
213
  try:
214
- client = OpenAI(api_key=api_key, base_url=fallback_endpoint)
215
  stream = client.chat.completions.create(
216
  model=fallback_model,
217
  messages=input_messages,
@@ -238,7 +259,7 @@ def request_generation(
238
  saw_visible_output = True
239
  buffer += content
240
 
241
- if "\n" in buffer or len(buffer) > 500:
242
  yield buffer
243
  buffer = ""
244
  continue
@@ -264,12 +285,46 @@ def request_generation(
264
  except Exception as e2:
265
  logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
266
  yield f"Error: Failed to load both models ({model_name} and {fallback_model}): {e2}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  else:
268
  yield f"Error: Failed to load model {model_name}: {e}"
269
 
270
  # وظيفة التنسيق النهائي
271
  def format_final(analysis_text: str, visible_text: str) -> str:
272
- """Render final message with collapsible analysis + normal Markdown answer."""
273
  reasoning_safe = html.escape((analysis_text or "").strip())
274
  response = (visible_text or "").strip()
275
  return (
@@ -287,7 +342,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
287
  yield "Please enter a prompt."
288
  return
289
 
290
- # Flatten gradio history وتنظيف metadata
291
  chat_history = []
292
  for h in history:
293
  if isinstance(h, dict):
@@ -299,7 +354,6 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
299
  if u: chat_history.append({"role": "user", "content": u})
300
  if a: chat_history.append({"role": "assistant", "content": a})
301
 
302
- # إعداد الأدوات
303
  tools = [
304
  {
305
  "type": "function",
@@ -329,8 +383,8 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
329
  },
330
  },
331
  }
332
- ] if "gpt-oss" in MODEL_NAME else []
333
- tool_choice = "auto" if "gpt-oss" in MODEL_NAME else "none"
334
 
335
  in_analysis = False
336
  in_visible = False
@@ -352,10 +406,10 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
352
  try:
353
  stream = request_generation(
354
  api_key=HF_TOKEN,
355
- api_base=API_ENDPOINT,
356
  message=message,
357
  system_prompt=system_prompt,
358
- model_name=MODEL_NAME,
359
  chat_history=chat_history,
360
  temperature=temperature,
361
  max_new_tokens=max_new_tokens,
@@ -422,7 +476,7 @@ chatbot_ui = gr.ChatInterface(
422
  gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.9),
423
  gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
424
  gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
425
- gr.Slider(label="Max New Tokens", minimum=50, maximum=4096, step=50, value=4096),
426
  ],
427
  stop_btn="Stop",
428
  examples=[
@@ -434,6 +488,7 @@ chatbot_ui = gr.ChatInterface(
434
  ["Create a Flask route for user authentication."],
435
  ["What are the latest trends in AI?"],
436
  ["Provide guidelines for publishing a technical blog post."],
 
437
  ],
438
  title="MGZon Chatbot",
439
  description="A versatile chatbot powered by GPT-OSS-20B and a fine-tuned model for MGZon queries. Supports code generation, analysis, review, web search, and MGZon-specific queries. Licensed under Apache 2.0. ***DISCLAIMER:*** Analysis may contain internal thoughts not suitable for final response.",
@@ -442,12 +497,83 @@ chatbot_ui = gr.ChatInterface(
442
  )
443
 
444
  # دمج FastAPI مع Gradio
445
- from fastapi import FastAPI
446
- from gradio import mount_gradio_app
447
-
448
  app = FastAPI(title="MGZon Chatbot API")
449
  app = mount_gradio_app(app, chatbot_ui, path="/")
450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
451
  # تشغيل الخادم
452
  if __name__ == "__main__":
453
  import uvicorn
 
9
  from bs4 import BeautifulSoup
10
  import re
11
  from tenacity import retry, stop_after_attempt, wait_exponential
12
+ from fastapi import FastAPI
13
+ from pydantic import BaseModel
14
+
15
+ # تعريف نموذج البيانات للـ API
16
+ class QueryRequest(BaseModel):
17
+ message: str
18
+ system_prompt: str = "You are a helpful assistant capable of code generation, analysis, review, and more."
19
+ history: Optional[List[dict]] = None
20
+ temperature: float = 0.9
21
+ max_new_tokens: int = 128000
22
+ enable_browsing: bool = False
23
 
24
  # تعريف LATEX_DELIMS
25
  LATEX_DELIMS = [
 
33
  logging.basicConfig(level=logging.INFO)
34
  logger = logging.getLogger(__name__)
35
 
36
+ # تحقق من الملفات في /app/
37
  logger.info("Files in /app/: %s", os.listdir("/app"))
38
 
39
  # إعداد العميل لـ Hugging Face Inference API
 
41
  API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
42
  FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
43
  MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
44
+ SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "MGZON/Veltrix")
45
+ TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
46
+
47
  if not HF_TOKEN:
48
  logger.error("HF_TOKEN is not set in environment variables.")
49
  raise ValueError("HF_TOKEN is required for Inference API.")
 
52
  QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
53
  CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
54
 
55
+ # دالة اختيار النموذج
56
+ def select_model(query: str) -> tuple[str, str]:
57
+ query_lower = query.lower()
58
+ mgzon_patterns = [
59
+ r"\bmgzon\b", r"\bmgzon\s+(products|services|platform|features|mission|technology|solutions|oauth)\b",
60
+ r"\bميزات\s+mgzon\b", r"\bخدمات\s+mgzon\b", r"\boauth\b"
61
+ ]
62
+ for pattern in mgzon_patterns:
63
+ if re.search(pattern, query_lower, re.IGNORECASE):
64
+ logger.info(f"Selected {SECONDARY_MODEL_NAME} with endpoint {FALLBACK_API_ENDPOINT} for MGZon-related query: {query}")
65
+ return SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT
66
+ logger.info(f"Selected {MODEL_NAME} with endpoint {API_ENDPOINT} for general query: {query}")
67
+ return MODEL_NAME, API_ENDPOINT
68
+
69
  # دالة بحث ويب محسنة
70
  def web_search(query: str) -> str:
71
  try:
 
73
  google_cse_id = os.getenv("GOOGLE_CSE_ID")
74
  if not google_api_key or not google_cse_id:
75
  return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
76
+ url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}+site:mgzon.com"
77
+ response = requests.get(url, timeout=10)
 
78
  response.raise_for_status()
79
  results = response.json().get("items", [])
80
  if not results:
81
  return "No web results found."
 
82
  search_results = []
83
+ for i, item in enumerate(results[:5]):
84
  title = item.get("title", "")
85
  snippet = item.get("snippet", "")
86
  link = item.get("link", "")
 
89
  page_response.raise_for_status()
90
  soup = BeautifulSoup(page_response.text, "html.parser")
91
  paragraphs = soup.find_all("p")
92
+ page_content = " ".join([p.get_text() for p in paragraphs][:1000])
93
  except Exception as e:
94
  logger.warning(f"Failed to fetch page content for {link}: {e}")
95
  page_content = snippet
96
  search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
 
97
  return "\n".join(search_results)
98
  except Exception as e:
99
  logger.exception("Web search failed")
100
  return f"Web search error: {e}"
101
 
102
+ # دالة request_generation
103
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
104
  def request_generation(
105
  api_key: str,
 
109
  model_name: str,
110
  chat_history: Optional[List[dict]] = None,
111
  temperature: float = 0.9,
112
+ max_new_tokens: int = 128000,
113
  reasoning_effort: str = "off",
114
  tools: Optional[List[dict]] = None,
115
  tool_choice: Optional[str] = None,
116
  deep_search: bool = False,
117
  ) -> Generator[str, None, None]:
118
+ client = OpenAI(api_key=api_key, base_url=api_base, timeout=60.0)
 
 
119
  task_type = "general"
120
  if "code" in message.lower() or "programming" in message.lower() or any(ext in message.lower() for ext in ["python", "javascript", "react", "django", "flask"]):
121
  task_type = "code"
 
130
  task_type = "publish"
131
  enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices."
132
  else:
133
+ enhanced_system_prompt = system_prompt
134
 
135
  logger.info(f"Task type detected: {task_type}")
 
136
  input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
137
  if chat_history:
138
  for msg in chat_history:
 
184
  saw_visible_output = True
185
  buffer += content
186
 
187
+ if "\n" in buffer or len(buffer) > 2000:
188
  yield buffer
189
  buffer = ""
190
  continue
 
232
  fallback_endpoint = FALLBACK_API_ENDPOINT
233
  logger.info(f"Retrying with fallback model: {fallback_model} on {fallback_endpoint}")
234
  try:
235
+ client = OpenAI(api_key=api_key, base_url=fallback_endpoint, timeout=60.0)
236
  stream = client.chat.completions.create(
237
  model=fallback_model,
238
  messages=input_messages,
 
259
  saw_visible_output = True
260
  buffer += content
261
 
262
+ if "\n" in buffer or len(buffer) > 2000:
263
  yield buffer
264
  buffer = ""
265
  continue
 
285
  except Exception as e2:
286
  logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
287
  yield f"Error: Failed to load both models ({model_name} and {fallback_model}): {e2}"
288
+ # تجربة النموذج الثالث
289
+ try:
290
+ client = OpenAI(api_key=api_key, base_url=FALLBACK_API_ENDPOINT, timeout=60.0)
291
+ stream = client.chat.completions.create(
292
+ model=TERTIARY_MODEL_NAME,
293
+ messages=input_messages,
294
+ temperature=temperature,
295
+ max_tokens=max_new_tokens,
296
+ stream=True,
297
+ tools=[],
298
+ tool_choice="none",
299
+ )
300
+ for chunk in stream:
301
+ if chunk.choices[0].delta.content:
302
+ content = chunk.choices[0].delta.content
303
+ saw_visible_output = True
304
+ buffer += content
305
+ if "\n" in buffer or len(buffer) > 2000:
306
+ yield buffer
307
+ buffer = ""
308
+ continue
309
+ if chunk.choices[0].finish_reason in ("stop", "error"):
310
+ if buffer:
311
+ yield buffer
312
+ buffer = ""
313
+ if not saw_visible_output:
314
+ yield "No visible output produced."
315
+ if chunk.choices[0].finish_reason == "error":
316
+ yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
317
+ break
318
+ if buffer:
319
+ yield buffer
320
+ except Exception as e3:
321
+ logger.exception(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
322
+ yield f"Error: Failed to load all models: {e3}"
323
  else:
324
  yield f"Error: Failed to load model {model_name}: {e}"
325
 
326
  # وظيفة التنسيق النهائي
327
  def format_final(analysis_text: str, visible_text: str) -> str:
 
328
  reasoning_safe = html.escape((analysis_text or "").strip())
329
  response = (visible_text or "").strip()
330
  return (
 
342
  yield "Please enter a prompt."
343
  return
344
 
345
+ model_name, api_endpoint = select_model(message)
346
  chat_history = []
347
  for h in history:
348
  if isinstance(h, dict):
 
354
  if u: chat_history.append({"role": "user", "content": u})
355
  if a: chat_history.append({"role": "assistant", "content": a})
356
 
 
357
  tools = [
358
  {
359
  "type": "function",
 
383
  },
384
  },
385
  }
386
+ ] if "gpt-oss" in model_name else []
387
+ tool_choice = "auto" if "gpt-oss" in model_name else "none"
388
 
389
  in_analysis = False
390
  in_visible = False
 
406
  try:
407
  stream = request_generation(
408
  api_key=HF_TOKEN,
409
+ api_base=api_endpoint,
410
  message=message,
411
  system_prompt=system_prompt,
412
+ model_name=model_name,
413
  chat_history=chat_history,
414
  temperature=temperature,
415
  max_new_tokens=max_new_tokens,
 
476
  gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.9),
477
  gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
478
  gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
479
+ gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=4096),
480
  ],
481
  stop_btn="Stop",
482
  examples=[
 
488
  ["Create a Flask route for user authentication."],
489
  ["What are the latest trends in AI?"],
490
  ["Provide guidelines for publishing a technical blog post."],
491
+ ["Who is the founder of MGZon?"],
492
  ],
493
  title="MGZon Chatbot",
494
  description="A versatile chatbot powered by GPT-OSS-20B and a fine-tuned model for MGZon queries. Supports code generation, analysis, review, web search, and MGZon-specific queries. Licensed under Apache 2.0. ***DISCLAIMER:*** Analysis may contain internal thoughts not suitable for final response.",
 
497
  )
498
 
499
  # دمج FastAPI مع Gradio
 
 
 
500
  app = FastAPI(title="MGZon Chatbot API")
501
  app = mount_gradio_app(app, chatbot_ui, path="/")
502
 
503
+ # API endpoints
504
+ @app.get("/api/model-info")
505
+ def model_info():
506
+ return {
507
+ "model_name": MODEL_NAME,
508
+ "secondary_model": SECONDARY_MODEL_NAME,
509
+ "tertiary_model": TERTIARY_MODEL_NAME,
510
+ "api_base": API_ENDPOINT,
511
+ "status": "online"
512
+ }
513
+
514
+ @app.post("/api/chat")
515
+ async def chat_endpoint(req: QueryRequest):
516
+ model_name, api_endpoint = select_model(req.message)
517
+ stream = request_generation(
518
+ api_key=HF_TOKEN,
519
+ api_base=api_endpoint,
520
+ message=req.message,
521
+ system_prompt=req.system_prompt,
522
+ model_name=model_name,
523
+ chat_history=req.history,
524
+ temperature=req.temperature,
525
+ max_new_tokens=req.max_new_tokens,
526
+ deep_search=req.enable_browsing,
527
+ )
528
+ response = "".join(list(stream))
529
+ return {"response": response}
530
+
531
+ @app.post("/api/code")
532
+ async def code_endpoint(req: dict):
533
+ framework = req.get("framework")
534
+ task = req.get("task")
535
+ code = req.get("code", "")
536
+ prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
537
+ model_name, api_endpoint = select_model(prompt)
538
+ response = "".join(list(request_generation(
539
+ api_key=HF_TOKEN,
540
+ api_base=api_endpoint,
541
+ message=prompt,
542
+ system_prompt="You are a coding expert.",
543
+ model_name=model_name,
544
+ temperature=0.7,
545
+ max_new_tokens=128000,
546
+ )))
547
+ return {"generated_code": response}
548
+
549
+ @app.post("/api/analysis")
550
+ async def analysis_endpoint(req: dict):
551
+ message = req.get("text", "")
552
+ model_name, api_endpoint = select_model(message)
553
+ response = "".join(list(request_generation(
554
+ api_key=HF_TOKEN,
555
+ api_base=api_endpoint,
556
+ message=message,
557
+ system_prompt="You are an expert analyst. Provide detailed analysis with step-by-step reasoning.",
558
+ model_name=model_name,
559
+ temperature=0.7,
560
+ max_new_tokens=128000,
561
+ )))
562
+ return {"analysis": response}
563
+
564
+ @app.get("/api/test-model")
565
+ async def test_model(model: str = MODEL_NAME, endpoint: str = API_ENDPOINT):
566
+ try:
567
+ client = OpenAI(api_key=HF_TOKEN, base_url=endpoint, timeout=60.0)
568
+ response = client.chat.completions.create(
569
+ model=model,
570
+ messages=[{"role": "user", "content": "Test"}],
571
+ max_tokens=50
572
+ )
573
+ return {"status": "success", "response": response.choices[0].message.content}
574
+ except Exception as e:
575
+ return {"status": "error", "message": str(e)}
576
+
577
  # تشغيل الخادم
578
  if __name__ == "__main__":
579
  import uvicorn