Spaces:

ibrahimlasfar
/

mgpt

Runtime error

App Files Files Community

MGZON commited on Aug 28, 2025

Commit

6346369

1 Parent(s): 27444a4

Update main.py

Browse files

Files changed (1) hide show

main.py +156 -30

main.py CHANGED Viewed

@@ -9,6 +9,17 @@ import requests
 from bs4 import BeautifulSoup
 import re
 from tenacity import retry, stop_after_attempt, wait_exponential
 # تعريف LATEX_DELIMS
 LATEX_DELIMS = [
@@ -22,7 +33,7 @@ LATEX_DELIMS = [
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# تحقق من الملفات في /app/ (للتصحيح)
 logger.info("Files in /app/: %s", os.listdir("/app"))
 # إعداد العميل لـ Hugging Face Inference API
@@ -30,7 +41,9 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
 MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
-SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
 if not HF_TOKEN:
     logger.error("HF_TOKEN is not set in environment variables.")
     raise ValueError("HF_TOKEN is required for Inference API.")
@@ -39,6 +52,20 @@ if not HF_TOKEN:
 QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
 CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
 # دالة بحث ويب محسنة
 def web_search(query: str) -> str:
     try:
@@ -46,16 +73,14 @@ def web_search(query: str) -> str:
         google_cse_id = os.getenv("GOOGLE_CSE_ID")
         if not google_api_key or not google_cse_id:
             return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
-        url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
-        response = requests.get(url)
         response.raise_for_status()
         results = response.json().get("items", [])
         if not results:
             return "No web results found."
         search_results = []
-        for i, item in enumerate(results[:3]):
             title = item.get("title", "")
             snippet = item.get("snippet", "")
             link = item.get("link", "")
@@ -64,18 +89,17 @@ def web_search(query: str) -> str:
                 page_response.raise_for_status()
                 soup = BeautifulSoup(page_response.text, "html.parser")
                 paragraphs = soup.find_all("p")
-                page_content = " ".join([p.get_text() for p in paragraphs][:500])
             except Exception as e:
                 logger.warning(f"Failed to fetch page content for {link}: {e}")
                 page_content = snippet
             search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
         return "\n".join(search_results)
     except Exception as e:
         logger.exception("Web search failed")
         return f"Web search error: {e}"
-# دالة request_generation مع retry
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 def request_generation(
     api_key: str,
@@ -85,15 +109,13 @@ def request_generation(
     model_name: str,
     chat_history: Optional[List[dict]] = None,
     temperature: float = 0.9,
-    max_new_tokens: int = 4096,  # زيادة لحد 4096
     reasoning_effort: str = "off",
     tools: Optional[List[dict]] = None,
     tool_choice: Optional[str] = None,
     deep_search: bool = False,
 ) -> Generator[str, None, None]:
-    """Streams Responses API events."""
-    client = OpenAI(api_key=api_key, base_url=api_base)
     task_type = "general"
     if "code" in message.lower() or "programming" in message.lower() or any(ext in message.lower() for ext in ["python", "javascript", "react", "django", "flask"]):
         task_type = "code"
@@ -108,10 +130,9 @@ def request_generation(
         task_type = "publish"
         enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices."
     else:
-        enhanced_system_prompt = f"{system_prompt}\nPlease provide detailed and comprehensive responses, including explanations, examples, and relevant details where applicable."
     logger.info(f"Task type detected: {task_type}")
     input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
     if chat_history:
         for msg in chat_history:
@@ -163,7 +184,7 @@ def request_generation(
                 saw_visible_output = True
                 buffer += content
-                if "\n" in buffer or len(buffer) > 500:  # زيادة الـ buffer لحد 500
                     yield buffer
                     buffer = ""
                 continue
@@ -211,7 +232,7 @@ def request_generation(
             fallback_endpoint = FALLBACK_API_ENDPOINT
             logger.info(f"Retrying with fallback model: {fallback_model} on {fallback_endpoint}")
             try:
-                client = OpenAI(api_key=api_key, base_url=fallback_endpoint)
                 stream = client.chat.completions.create(
                     model=fallback_model,
                     messages=input_messages,
@@ -238,7 +259,7 @@ def request_generation(
                         saw_visible_output = True
                         buffer += content
-                        if "\n" in buffer or len(buffer) > 500:
                             yield buffer
                             buffer = ""
                         continue
@@ -264,12 +285,46 @@ def request_generation(
             except Exception as e2:
                 logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
                 yield f"Error: Failed to load both models ({model_name} and {fallback_model}): {e2}"
         else:
             yield f"Error: Failed to load model {model_name}: {e}"
 # وظيفة التنسيق النهائي
 def format_final(analysis_text: str, visible_text: str) -> str:
-    """Render final message with collapsible analysis + normal Markdown answer."""
     reasoning_safe = html.escape((analysis_text or "").strip())
     response = (visible_text or "").strip()
     return (
@@ -287,7 +342,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
         yield "Please enter a prompt."
         return
-    # Flatten gradio history وتنظيف metadata
     chat_history = []
     for h in history:
         if isinstance(h, dict):
@@ -299,7 +354,6 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
             if u: chat_history.append({"role": "user", "content": u})
             if a: chat_history.append({"role": "assistant", "content": a})
-    # إعداد الأدوات
     tools = [
         {
             "type": "function",
@@ -329,8 +383,8 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
                 },
             },
         }
-    ] if "gpt-oss" in MODEL_NAME else []
-    tool_choice = "auto" if "gpt-oss" in MODEL_NAME else "none"
     in_analysis = False
     in_visible = False
@@ -352,10 +406,10 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
     try:
         stream = request_generation(
             api_key=HF_TOKEN,
-            api_base=API_ENDPOINT,
             message=message,
             system_prompt=system_prompt,
-            model_name=MODEL_NAME,
             chat_history=chat_history,
             temperature=temperature,
             max_new_tokens=max_new_tokens,
@@ -422,7 +476,7 @@ chatbot_ui = gr.ChatInterface(
         gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.9),
         gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
         gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
-        gr.Slider(label="Max New Tokens", minimum=50, maximum=4096, step=50, value=4096),
     ],
     stop_btn="Stop",
     examples=[
@@ -434,6 +488,7 @@ chatbot_ui = gr.ChatInterface(
         ["Create a Flask route for user authentication."],
         ["What are the latest trends in AI?"],
         ["Provide guidelines for publishing a technical blog post."],
     ],
     title="MGZon Chatbot",
     description="A versatile chatbot powered by GPT-OSS-20B and a fine-tuned model for MGZon queries. Supports code generation, analysis, review, web search, and MGZon-specific queries. Licensed under Apache 2.0. ***DISCLAIMER:*** Analysis may contain internal thoughts not suitable for final response.",
@@ -442,12 +497,83 @@ chatbot_ui = gr.ChatInterface(
 )
 # دمج FastAPI مع Gradio
-from fastapi import FastAPI
-from gradio import mount_gradio_app
 app = FastAPI(title="MGZon Chatbot API")
 app = mount_gradio_app(app, chatbot_ui, path="/")
 # تشغيل الخادم
 if __name__ == "__main__":
     import uvicorn

 from bs4 import BeautifulSoup
 import re
 from tenacity import retry, stop_after_attempt, wait_exponential
+from fastapi import FastAPI
+from pydantic import BaseModel
+# تعريف نموذج البيانات للـ API
+class QueryRequest(BaseModel):
+    message: str
+    system_prompt: str = "You are a helpful assistant capable of code generation, analysis, review, and more."
+    history: Optional[List[dict]] = None
+    temperature: float = 0.9
+    max_new_tokens: int = 128000
+    enable_browsing: bool = False
 # تعريف LATEX_DELIMS
 LATEX_DELIMS = [
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# تحقق من الملفات في /app/
 logger.info("Files in /app/: %s", os.listdir("/app"))
 # إعداد العميل لـ Hugging Face Inference API
 API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
 FALLBACK_API_ENDPOINT = "https://api-inference.huggingface.co/v1"
 MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
+SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "MGZON/Veltrix")
+TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
 if not HF_TOKEN:
     logger.error("HF_TOKEN is not set in environment variables.")
     raise ValueError("HF_TOKEN is required for Inference API.")
 QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
 CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
+# دالة اختيار النموذج
+def select_model(query: str) -> tuple[str, str]:
+    query_lower = query.lower()
+    mgzon_patterns = [
+        r"\bmgzon\b", r"\bmgzon\s+(products|services|platform|features|mission|technology|solutions|oauth)\b",
+        r"\bميزات\s+mgzon\b", r"\bخدمات\s+mgzon\b", r"\boauth\b"
+    ]
+    for pattern in mgzon_patterns:
+        if re.search(pattern, query_lower, re.IGNORECASE):
+            logger.info(f"Selected {SECONDARY_MODEL_NAME} with endpoint {FALLBACK_API_ENDPOINT} for MGZon-related query: {query}")
+            return SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT
+    logger.info(f"Selected {MODEL_NAME} with endpoint {API_ENDPOINT} for general query: {query}")
+    return MODEL_NAME, API_ENDPOINT
 # دالة بحث ويب محسنة
 def web_search(query: str) -> str:
     try:
         google_cse_id = os.getenv("GOOGLE_CSE_ID")
         if not google_api_key or not google_cse_id:
             return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
+        url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}+site:mgzon.com"
+        response = requests.get(url, timeout=10)
         response.raise_for_status()
         results = response.json().get("items", [])
         if not results:
             return "No web results found."
         search_results = []
+        for i, item in enumerate(results[:5]):
             title = item.get("title", "")
             snippet = item.get("snippet", "")
             link = item.get("link", "")
                 page_response.raise_for_status()
                 soup = BeautifulSoup(page_response.text, "html.parser")
                 paragraphs = soup.find_all("p")
+                page_content = " ".join([p.get_text() for p in paragraphs][:1000])
             except Exception as e:
                 logger.warning(f"Failed to fetch page content for {link}: {e}")
                 page_content = snippet
             search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
         return "\n".join(search_results)
     except Exception as e:
         logger.exception("Web search failed")
         return f"Web search error: {e}"
+# دالة request_generation
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 def request_generation(
     api_key: str,
     model_name: str,
     chat_history: Optional[List[dict]] = None,
     temperature: float = 0.9,
+    max_new_tokens: int = 128000,
     reasoning_effort: str = "off",
     tools: Optional[List[dict]] = None,
     tool_choice: Optional[str] = None,
     deep_search: bool = False,
 ) -> Generator[str, None, None]:
+    client = OpenAI(api_key=api_key, base_url=api_base, timeout=60.0)
     task_type = "general"
     if "code" in message.lower() or "programming" in message.lower() or any(ext in message.lower() for ext in ["python", "javascript", "react", "django", "flask"]):
         task_type = "code"
         task_type = "publish"
         enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices."
     else:
+        enhanced_system_prompt = system_prompt
     logger.info(f"Task type detected: {task_type}")
     input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
     if chat_history:
         for msg in chat_history:
                 saw_visible_output = True
                 buffer += content
+                if "\n" in buffer or len(buffer) > 2000:
                     yield buffer
                     buffer = ""
                 continue
             fallback_endpoint = FALLBACK_API_ENDPOINT
             logger.info(f"Retrying with fallback model: {fallback_model} on {fallback_endpoint}")
             try:
+                client = OpenAI(api_key=api_key, base_url=fallback_endpoint, timeout=60.0)
                 stream = client.chat.completions.create(
                     model=fallback_model,
                     messages=input_messages,
                         saw_visible_output = True
                         buffer += content
+                        if "\n" in buffer or len(buffer) > 2000:
                             yield buffer
                             buffer = ""
                         continue
             except Exception as e2:
                 logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
                 yield f"Error: Failed to load both models ({model_name} and {fallback_model}): {e2}"
+                # تجربة النموذج الثالث
+                try:
+                    client = OpenAI(api_key=api_key, base_url=FALLBACK_API_ENDPOINT, timeout=60.0)
+                    stream = client.chat.completions.create(
+                        model=TERTIARY_MODEL_NAME,
+                        messages=input_messages,
+                        temperature=temperature,
+                        max_tokens=max_new_tokens,
+                        stream=True,
+                        tools=[],
+                        tool_choice="none",
+                    )
+                    for chunk in stream:
+                        if chunk.choices[0].delta.content:
+                            content = chunk.choices[0].delta.content
+                            saw_visible_output = True
+                            buffer += content
+                            if "\n" in buffer or len(buffer) > 2000:
+                                yield buffer
+                                buffer = ""
+                            continue
+                        if chunk.choices[0].finish_reason in ("stop", "error"):
+                            if buffer:
+                                yield buffer
+                                buffer = ""
+                            if not saw_visible_output:
+                                yield "No visible output produced."
+                            if chunk.choices[0].finish_reason == "error":
+                                yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
+                            break
+                    if buffer:
+                        yield buffer
+                except Exception as e3:
+                    logger.exception(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
+                    yield f"Error: Failed to load all models: {e3}"
         else:
             yield f"Error: Failed to load model {model_name}: {e}"
 # وظيفة التنسيق النهائي
 def format_final(analysis_text: str, visible_text: str) -> str:
     reasoning_safe = html.escape((analysis_text or "").strip())
     response = (visible_text or "").strip()
     return (
         yield "Please enter a prompt."
         return
+    model_name, api_endpoint = select_model(message)
     chat_history = []
     for h in history:
         if isinstance(h, dict):
             if u: chat_history.append({"role": "user", "content": u})
             if a: chat_history.append({"role": "assistant", "content": a})
     tools = [
         {
             "type": "function",
                 },
             },
         }
+    ] if "gpt-oss" in model_name else []
+    tool_choice = "auto" if "gpt-oss" in model_name else "none"
     in_analysis = False
     in_visible = False
     try:
         stream = request_generation(
             api_key=HF_TOKEN,
+            api_base=api_endpoint,
             message=message,
             system_prompt=system_prompt,
+            model_name=model_name,
             chat_history=chat_history,
             temperature=temperature,
             max_new_tokens=max_new_tokens,
         gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.9),
         gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
         gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
+        gr.Slider(label="Max New Tokens", minimum=50, maximum=128000, step=50, value=4096),
     ],
     stop_btn="Stop",
     examples=[
         ["Create a Flask route for user authentication."],
         ["What are the latest trends in AI?"],
         ["Provide guidelines for publishing a technical blog post."],
+        ["Who is the founder of MGZon?"],
     ],
     title="MGZon Chatbot",
     description="A versatile chatbot powered by GPT-OSS-20B and a fine-tuned model for MGZon queries. Supports code generation, analysis, review, web search, and MGZon-specific queries. Licensed under Apache 2.0. ***DISCLAIMER:*** Analysis may contain internal thoughts not suitable for final response.",
 )
 # دمج FastAPI مع Gradio
 app = FastAPI(title="MGZon Chatbot API")
 app = mount_gradio_app(app, chatbot_ui, path="/")
+# API endpoints
+@app.get("/api/model-info")
+def model_info():
+    return {
+        "model_name": MODEL_NAME,
+        "secondary_model": SECONDARY_MODEL_NAME,
+        "tertiary_model": TERTIARY_MODEL_NAME,
+        "api_base": API_ENDPOINT,
+        "status": "online"
+    }
+@app.post("/api/chat")
+async def chat_endpoint(req: QueryRequest):
+    model_name, api_endpoint = select_model(req.message)
+    stream = request_generation(
+        api_key=HF_TOKEN,
+        api_base=api_endpoint,
+        message=req.message,
+        system_prompt=req.system_prompt,
+        model_name=model_name,
+        chat_history=req.history,
+        temperature=req.temperature,
+        max_new_tokens=req.max_new_tokens,
+        deep_search=req.enable_browsing,
+    )
+    response = "".join(list(stream))
+    return {"response": response}
+@app.post("/api/code")
+async def code_endpoint(req: dict):
+    framework = req.get("framework")
+    task = req.get("task")
+    code = req.get("code", "")
+    prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
+    model_name, api_endpoint = select_model(prompt)
+    response = "".join(list(request_generation(
+        api_key=HF_TOKEN,
+        api_base=api_endpoint,
+        message=prompt,
+        system_prompt="You are a coding expert.",
+        model_name=model_name,
+        temperature=0.7,
+        max_new_tokens=128000,
+    )))
+    return {"generated_code": response}
+@app.post("/api/analysis")
+async def analysis_endpoint(req: dict):
+    message = req.get("text", "")
+    model_name, api_endpoint = select_model(message)
+    response = "".join(list(request_generation(
+        api_key=HF_TOKEN,
+        api_base=api_endpoint,
+        message=message,
+        system_prompt="You are an expert analyst. Provide detailed analysis with step-by-step reasoning.",
+        model_name=model_name,
+        temperature=0.7,
+        max_new_tokens=128000,
+    )))
+    return {"analysis": response}
+@app.get("/api/test-model")
+async def test_model(model: str = MODEL_NAME, endpoint: str = API_ENDPOINT):
+    try:
+        client = OpenAI(api_key=HF_TOKEN, base_url=endpoint, timeout=60.0)
+        response = client.chat.completions.create(
+            model=model,
+            messages=[{"role": "user", "content": "Test"}],
+            max_tokens=50
+        )
+        return {"status": "success", "response": response.choices[0].message.content}
+    except Exception as e:
+        return {"status": "error", "message": str(e)}
 # تشغيل الخادم
 if __name__ == "__main__":
     import uvicorn