Spaces:

bhavesh122
/

super_AI

Running

App Files Files Community

bhavesh122 commited on Apr 17

Commit

00a7a0c

verified ·

1 Parent(s): cfda0d3

Upload 20 files

Browse files

Files changed (3) hide show

huggingface_client.py +36 -1
main.py +48 -14
nvidia_client.py +8 -4

huggingface_client.py CHANGED Viewed

@@ -49,7 +49,7 @@ class HuggingFaceClient(AIClient):
             messages.append({"role": "system", "content": system_prompt})
         messages.append({"role": "user", "content": prompt})
-        url = f"https://api-inference.huggingface.co/models/{model_id}/v1/chat/completions"
         payload = {
             "model": model_id,
@@ -81,5 +81,40 @@ class HuggingFaceClient(AIClient):
         except Exception as e:
             yield f"Connection error: {str(e)}"
     async def close(self) -> None:
         await self.client.aclose()

             messages.append({"role": "system", "content": system_prompt})
         messages.append({"role": "user", "content": prompt})
+        url = "https://router.huggingface.co/v1/chat/completions"
         payload = {
             "model": model_id,
         except Exception as e:
             yield f"Connection error: {str(e)}"
+    async def text_to_image(
+        self,
+        model_id: str,
+        prompt: str,
+        provider: Optional[str] = "fal-ai"
+    ) -> Optional[str]:
+        """Generates an image using HF Inference API and returns it as a base64 string."""
+        token = self._get_token()
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json",
+            "x-use-cache": "false"
+        }
+        # For HF Inference Client with provider
+        url = f"https://api-inference.huggingface.co/models/{model_id}"
+        payload = {"inputs": prompt}
+        if provider:
+            # Note: Provider routing might need specific headers or URL structure depending on HF's evolving API
+            # For now, we'll try the standard model path as HF often routes based on model name
+            pass
+        try:
+            async with self.client.post(url, headers=headers, json=payload) as response:
+                if response.status_code == 200:
+                    image_data = await response.aread()
+                    import base64
+                    return base64.b64encode(image_data).decode('utf-8')
+                else:
+                    print(f"HF Image Error: {response.status_code} - {await response.aread()}")
+                    return None
+        except Exception as e:
+            print(f"HF Image Exception: {str(e)}")
+            return None
     async def close(self) -> None:
         await self.client.aclose()

main.py CHANGED Viewed

@@ -243,7 +243,7 @@ _NV_MODELS = {
 # --- GitHub model IDs (for consolidated OpenAI brand) ---
 _GH_MODELS = {
-    "gpt-4o", "gpt-4o-mini"
 }
 # Global HTTP client
@@ -388,7 +388,8 @@ async def stream_chat(request: MultiChatRequest):
         "2. Break your answer into clear, numbered parts (1., 2., 3., etc.).\n"
         "3. Within each part, use bold markers for terms (e.g., '- **Key Term**: Definition').\n"
         "4. Never mention your underlying provider or model (GPT, Llama, NVIDIA, etc.).\n"
-        "5. Keep the tone educational and highly structured as seen in professional AI documentation."
     )
     system_modifiers = {
@@ -412,6 +413,7 @@ async def stream_chat(request: MultiChatRequest):
         final_prompt = f"System Instruction: {system_prompt}\n\nUser Question: {request.prompt}"
     async def event_generator():
         try:
             # --- ULTIMATE 11-MODEL POWERHOUSE REGISTRY ---
             MODEL_MAP = {
@@ -419,32 +421,38 @@ async def stream_chat(request: MultiChatRequest):
                 "openai/gpt-4o": ("gpt-4o", "github"),
                 "meta-llama/llama-4-scout": ("meta-llama/llama-4-scout-17b-16e-instruct", "groq"),
                 "qwen/qwen3-32b": ("qwen/qwen3-32b", "groq"),
-                "moonshot/kimi-k2": ("moonshotai/kimi-k2-instruct", "groq"),
                 "nvidia/phi-4": ("microsoft/phi-4-multimodal-instruct", "nvidia"),
                 "nvidia/glm4-7": ("z-ai/glm4.7", "nvidia"),
-                "nvidia/deepseek-v32": ("deepseek-ai/deepseek-v3.2", "nvidia"),
                 "google/gemini-3.1-flash-lite": ("gemini-3.1-flash-lite-preview", "google"),
                 "arcee/trinity-large": ("arcee-ai/trinity-large-preview:free", "openrouter"),
                 "minimax/minimax-m2.5": ("minimax/minimax-m2.5:free", "openrouter"),
                 "liquid/lfm-2.5": ("liquid/lfm-2.5-1.2b-instruct:free", "openrouter"),
-                "xai/mistral-small": ("command-r-08-2024", "github")
             }
             # --- BRAND IDENTITY MAPPING ---
             BRAND_NAME_MAP = {
                 "openai/gpt-4o-mini": "GPT-4o Mini",
-                "openai/gpt-4o": "GPT-4o Pro",
                 "meta-llama/llama-4-scout": "Llama 4 Scout",
                 "qwen/qwen3-32b": "Qwen 3-32B",
                 "moonshot/kimi-k2": "Kimi K2",
                 "nvidia/phi-4": "Phi-4 Multimodal",
                 "nvidia/glm4-7": "GLM 4.7 Reasoning",
-                "nvidia/deepseek-v32": "DeepSeek V3.2",
                 "google/gemini-3.1-flash-lite": "Gemini 3.1 Flash Lite",
                 "arcee/trinity-large": "Trinity Large",
                 "minimax/minimax-m2.5": "Minimax M2.5",
                 "liquid/lfm-2.5": "Liquid LFM",
-                "xai/mistral-small": "Cohere Command R"
             }
             print(f"DEBUG: target_model = '{target_model}'")
@@ -530,9 +538,9 @@ async def stream_chat(request: MultiChatRequest):
                         headers=headers) as response:
                         if response.status_code != 200:
                             err_body = await response.aread()
-                            print(f"❌ [Provider {provider} Error]: {response.status_code} - {err_body.decode()}")
-                            yield f"data: {json.dumps({'error': f'Provider {provider} Error: {err_body.decode()}'})}\n\n"
-                            return
                         # Tracking generated tokens (approximation)
                         token_increment = 0
@@ -545,9 +553,17 @@ async def stream_chat(request: MultiChatRequest):
                                 if data_str == "[DONE]": break
                                 try:
                                     data_json = json.loads(data_str)
-                                    chunk = data_json["choices"][0].get("delta", {}).get("content", "")
                                     if chunk:
                                         token_increment += 1
                                         yield f"data: {json.dumps({'chunk': chunk})}\n\n"
                                 except: pass
@@ -566,7 +582,9 @@ async def stream_chat(request: MultiChatRequest):
                                     if line.startswith("data:") and "[DONE]" not in line:
                                         try:
                                             chunk = json.loads(line[5:])["choices"][0].get("delta", {}).get("content", "")
-                                            if chunk: yield f"data: {json.dumps({'chunk': chunk})}\n\n"
                                         except: pass
             elif provider == "local_proxy":
                 # Special local proxy for Node-based free models (DDG/G4F/FCM)
@@ -579,10 +597,12 @@ async def stream_chat(request: MultiChatRequest):
                     ) as response:
                         async for line in response.aiter_lines():
                             if line.startswith("data: "):
                                 yield f"{line}\n\n"
                 return
             elif provider == "nvidia":
                 async for chunk in nv_client.async_stream_request(model_to_use, request.prompt, request.system_prompt):
                     yield f"data: {json.dumps({'chunk': chunk})}\n\n"
             elif provider == "github":
                 # Check for dynamic tokens (Mistral Small)
@@ -591,12 +611,15 @@ async def stream_chat(request: MultiChatRequest):
                     specific_token = get_api_key_rotated("GITHUB_TOKEN", 2)
                 async for chunk in gh_client.async_stream_request(model_to_use, request.prompt, request.system_prompt, api_key=specific_token):
                     yield f"data: {json.dumps({'chunk': chunk})}\n\n"
             elif provider == "hf":
                 async for chunk in hf_client.async_stream_request(model_to_use, request.prompt, request.system_prompt):
                     yield f"data: {json.dumps({'chunk': chunk})}\n\n"
             elif provider == "google":
                 async for chunk in goog_client.async_stream_request(model_to_use, request.prompt, request.system_prompt):
                     yield f"data: {json.dumps({'chunk': chunk})}\n\n"
             else:
                 yield f"data: {json.dumps({'error': f'Unknown provider {provider}'})}\n\n"
@@ -608,6 +631,16 @@ async def stream_chat(request: MultiChatRequest):
             for word in fallback_text.split(" "):
                 yield f"data: {json.dumps({'chunk': word + ' '})}\n\n"
                 await asyncio.sleep(0.01)
     headers = {
         "X-Accel-Buffering": "no",
         "Cache-Control": "no-cache",
@@ -1223,7 +1256,8 @@ async def image_generate(request: ImageGenerateRequest):
         "black-forest-labs/FLUX.1-schnell": ("huggingface", "black-forest-labs/FLUX.1-schnell"),
         "Qwen/Qwen-Image": ("huggingface", "Qwen/Qwen-Image-2512"),
         "tencent/HunyuanImage-3.0": ("huggingface", "tencent/HunyuanImage-3.0"),
-        "ByteDance/SDXL-Lightning": ("huggingface", "ByteDance/SDXL-Lightning")
     }
     provider, target_model = model_map.get(request.model_id, ("pollinations", "flux"))

 # --- GitHub model IDs (for consolidated OpenAI brand) ---
 _GH_MODELS = {
+    "openai/gpt-4o", "gpt-4o-mini"
 }
 # Global HTTP client
         "2. Break your answer into clear, numbered parts (1., 2., 3., etc.).\n"
         "3. Within each part, use bold markers for terms (e.g., '- **Key Term**: Definition').\n"
         "4. Never mention your underlying provider or model (GPT, Llama, NVIDIA, etc.).\n"
+        "5. Keep the tone educational and highly structured as seen in professional AI documentation.\n"
+        "6. If you are reasoning or thinking, wrap your thoughts in <thought>...</thought> tags."
     )
     system_modifiers = {
         final_prompt = f"System Instruction: {system_prompt}\n\nUser Question: {request.prompt}"
     async def event_generator():
+        yield_count = 0
         try:
             # --- ULTIMATE 11-MODEL POWERHOUSE REGISTRY ---
             MODEL_MAP = {
                 "openai/gpt-4o": ("gpt-4o", "github"),
                 "meta-llama/llama-4-scout": ("meta-llama/llama-4-scout-17b-16e-instruct", "groq"),
                 "qwen/qwen3-32b": ("qwen/qwen3-32b", "groq"),
+                "moonshot/kimi-k2": ("moonshotai/kimi-k2-instruct-0905", "groq"),
                 "nvidia/phi-4": ("microsoft/phi-4-multimodal-instruct", "nvidia"),
                 "nvidia/glm4-7": ("z-ai/glm4.7", "nvidia"),
+                "nvidia/deepseek-v32": ("deepseek-ai/deepseek-v3", "nvidia"),
                 "google/gemini-3.1-flash-lite": ("gemini-3.1-flash-lite-preview", "google"),
                 "arcee/trinity-large": ("arcee-ai/trinity-large-preview:free", "openrouter"),
                 "minimax/minimax-m2.5": ("minimax/minimax-m2.5:free", "openrouter"),
                 "liquid/lfm-2.5": ("liquid/lfm-2.5-1.2b-instruct:free", "openrouter"),
+                "nvidia/mistral-large-3": ("mistralai/mistral-large-3-675b-instruct-2512", "nvidia"),
+                "openrouter/nemotron-3-super": ("nvidia/nemotron-3-super-120b-a12b:free", "openrouter"),
+                "huggingface/minimax-m2.7": ("MiniMaxAI/MiniMax-M2.7:together", "huggingface"),
+                "huggingface/glm-5.1": ("zai-org/GLM-5.1:together", "huggingface")
             }
             # --- BRAND IDENTITY MAPPING ---
             BRAND_NAME_MAP = {
                 "openai/gpt-4o-mini": "GPT-4o Mini",
+                "openai/gpt-4o": "GPT-5 Mini",
                 "meta-llama/llama-4-scout": "Llama 4 Scout",
                 "qwen/qwen3-32b": "Qwen 3-32B",
                 "moonshot/kimi-k2": "Kimi K2",
                 "nvidia/phi-4": "Phi-4 Multimodal",
                 "nvidia/glm4-7": "GLM 4.7 Reasoning",
+                "nvidia/deepseek-v32": "DeepSeek V3",
                 "google/gemini-3.1-flash-lite": "Gemini 3.1 Flash Lite",
                 "arcee/trinity-large": "Trinity Large",
                 "minimax/minimax-m2.5": "Minimax M2.5",
                 "liquid/lfm-2.5": "Liquid LFM",
+                "nvidia/mistral-large-3": "Mistral Large 3",
+                "openrouter/nemotron-3-super": "Nemotron-3 Super",
+                "huggingface/minimax-m2.7": "MiniMax M2.7",
+                "huggingface/glm-5.1": "GLM 5.1 Reasoning"
             }
             print(f"DEBUG: target_model = '{target_model}'")
                         headers=headers) as response:
                         if response.status_code != 200:
                             err_body = await response.aread()
+                            error_msg = f"Provider {provider} Error: {response.status_code} - {err_body.decode()}"
+                            print(f"❌ {error_msg}")
+                            raise Exception(error_msg)
                         # Tracking generated tokens (approximation)
                         token_increment = 0
                                 if data_str == "[DONE]": break
                                 try:
                                     data_json = json.loads(data_str)
+                                    delta = data_json["choices"][0].get("delta", {})
+                                    # Support for Thinking/Reasoning (Groq Qwen/DeepSeek etc)
+                                    reasoning = delta.get("reasoning_content")
+                                    if reasoning:
+                                        yield f"data: {json.dumps({'chunk': f'<thought>{reasoning}</thought>'})}\n\n"
+                                    chunk = delta.get("content", "")
                                     if chunk:
                                         token_increment += 1
+                                        yield_count += 1
                                         yield f"data: {json.dumps({'chunk': chunk})}\n\n"
                                 except: pass
                                     if line.startswith("data:") and "[DONE]" not in line:
                                         try:
                                             chunk = json.loads(line[5:])["choices"][0].get("delta", {}).get("content", "")
+                                            if chunk:
+                                                yield_count += 1
+                                                yield f"data: {json.dumps({'chunk': chunk})}\n\n"
                                         except: pass
             elif provider == "local_proxy":
                 # Special local proxy for Node-based free models (DDG/G4F/FCM)
                     ) as response:
                         async for line in response.aiter_lines():
                             if line.startswith("data: "):
+                                yield_count += 1
                                 yield f"{line}\n\n"
                 return
             elif provider == "nvidia":
                 async for chunk in nv_client.async_stream_request(model_to_use, request.prompt, request.system_prompt):
+                    yield_count += 1
                     yield f"data: {json.dumps({'chunk': chunk})}\n\n"
             elif provider == "github":
                 # Check for dynamic tokens (Mistral Small)
                     specific_token = get_api_key_rotated("GITHUB_TOKEN", 2)
                 async for chunk in gh_client.async_stream_request(model_to_use, request.prompt, request.system_prompt, api_key=specific_token):
+                    yield_count += 1
                     yield f"data: {json.dumps({'chunk': chunk})}\n\n"
             elif provider == "hf":
                 async for chunk in hf_client.async_stream_request(model_to_use, request.prompt, request.system_prompt):
+                    yield_count += 1
                     yield f"data: {json.dumps({'chunk': chunk})}\n\n"
             elif provider == "google":
                 async for chunk in goog_client.async_stream_request(model_to_use, request.prompt, request.system_prompt):
+                    yield_count += 1
                     yield f"data: {json.dumps({'chunk': chunk})}\n\n"
             else:
                 yield f"data: {json.dumps({'error': f'Unknown provider {provider}'})}\n\n"
             for word in fallback_text.split(" "):
                 yield f"data: {json.dumps({'chunk': word + ' '})}\n\n"
                 await asyncio.sleep(0.01)
+            # --- REAL FALLBACK EXECUTION ---
+            try:
+                # Use GPT-4o Mini on GitHub as the ultimate reliable fallback
+                gh_client.api_key = get_api_key_rotated("GITHUB_API_KEY", 2, index=0)
+                async for chunk in gh_client.async_stream_request("gpt-4o-mini", request.prompt, request.system_prompt):
+                    yield f"data: {json.dumps({'chunk': chunk})}\n\n"
+            except Exception as fallback_err:
+                print(f"💀 [Critical Fallback Failed]: {fallback_err}")
+                yield f"data: {json.dumps({'error': 'All providers exhausted. Please try again later.'})}\n\n"
     headers = {
         "X-Accel-Buffering": "no",
         "Cache-Control": "no-cache",
         "black-forest-labs/FLUX.1-schnell": ("huggingface", "black-forest-labs/FLUX.1-schnell"),
         "Qwen/Qwen-Image": ("huggingface", "Qwen/Qwen-Image-2512"),
         "tencent/HunyuanImage-3.0": ("huggingface", "tencent/HunyuanImage-3.0"),
+        "ByteDance/SDXL-Lightning": ("huggingface", "ByteDance/SDXL-Lightning"),
+        "baidu/ERNIE-Image-Turbo": ("huggingface", "baidu/ERNIE-Image-Turbo")
     }
     provider, target_model = model_map.get(request.model_id, ("pollinations", "flux"))

nvidia_client.py CHANGED Viewed

@@ -12,7 +12,7 @@ class NvidiaClient(AIClient):
     def __init__(self):
         self.api_keys = []
         # Support multiple keys if available (NVIDIA_API_KEY, NVIDIA_API_KEY2, etc.)
-        for key in ["NVIDIA_API_KEY", "NVIDIA_API_KEY2", "NVIDIA_API_KEY3"]:
             val = os.getenv(key)
             if val:
                 self.api_keys.append(val.strip())
@@ -50,11 +50,15 @@ class NvidiaClient(AIClient):
             "model": model_id,
             "messages": messages,
             "stream": True,
-            "max_tokens": kwargs.get("max_tokens", 1024),
-            "temperature": kwargs.get("temperature", 0.5),
-            "top_p": 1.0
         }
         try:
             async with self.client.stream("POST", url, headers=headers, json=payload) as response:
                 if response.status_code != 200:

     def __init__(self):
         self.api_keys = []
         # Support multiple keys if available (NVIDIA_API_KEY, NVIDIA_API_KEY2, etc.)
+        for key in ["NVIDIA_API_KEY", "NVIDIA_API_KEY2", "NVIDIA_API_KEY3", "NVIDIA_API_KEY4"]:
             val = os.getenv(key)
             if val:
                 self.api_keys.append(val.strip())
             "model": model_id,
             "messages": messages,
             "stream": True,
+            "max_tokens": kwargs.get("max_tokens", 8192 if "deepseek" in model_id.lower() or "mistral-large-3" in model_id.lower() else 1024),
+            "temperature": kwargs.get("temperature", 0.15 if "mistral-large-3" in model_id.lower() else (0.2 if "deepseek" in model_id.lower() else 0.5)),
+            "top_p": 1.0 if "mistral-large-3" in model_id.lower() else (0.7 if "deepseek" in model_id.lower() else 1.0)
         }
+        # Handle DeepSeek thinking
+        if "deepseek" in model_id.lower():
+            payload["extra_body"] = {"chat_template_kwargs": {"thinking": True}}
         try:
             async with self.client.stream("POST", url, headers=headers, json=payload) as response:
                 if response.status_code != 200: