Spaces:

Gaston895
/

Conductor

Sleeping

App Files Files Community

Gaston895 commited on 24 days ago

Commit

fb249da

verified ·

1 Parent(s): 95fb302

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +86 -93

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ import logging
 import os
 from dotenv import load_dotenv
 import random
-from huggingface_hub import InferenceClient
 # Load environment variables
 load_dotenv()
@@ -38,112 +38,97 @@ GLOBAL_REGIONS = [
 # HuggingFace Token for all providers
 HF_TOKEN = os.getenv('HF_TOKEN', '')
-# Initialize InferenceClient instances for DeepSeek models using new router endpoint
-inference_clients = []
 if HF_TOKEN:
     try:
-        # Primary DeepSeek-V3.2-Exp client with new router endpoint
-        primary_client = InferenceClient(
-            model="deepseek-ai/DeepSeek-V3.2-Exp",
-            token=HF_TOKEN,
             base_url="https://router.huggingface.co/v1"
         )
-        inference_clients.append({
             "name": "deepseek-v3.2-exp",
             "client": primary_client,
             "model": "deepseek-ai/DeepSeek-V3.2-Exp"
         })
-        # Secondary DeepSeek-V3-Base client with new router endpoint
-        secondary_client = InferenceClient(
-            model="deepseek-ai/DeepSeek-V3-Base",
-            token=HF_TOKEN,
             base_url="https://router.huggingface.co/v1"
         )
-        inference_clients.append({
             "name": "deepseek-v3-base",
             "client": secondary_client,
             "model": "deepseek-ai/DeepSeek-V3-Base"
         })
-        # Fallback client (same as primary) with new router endpoint
-        fallback_client = InferenceClient(
-            model="deepseek-ai/DeepSeek-V3.2-Exp",
-            token=HF_TOKEN,
             base_url="https://router.huggingface.co/v1"
         )
-        inference_clients.append({
             "name": "deepseek-fallback",
             "client": fallback_client,
             "model": "deepseek-ai/DeepSeek-V3.2-Exp"
         })
     except Exception as e:
-        logger.error(f"Failed to initialize InferenceClient: {e}")
-# Legacy API_PROVIDERS for compatibility (now using InferenceClient)
 API_PROVIDERS = [
     {
         "name": "deepseek-v3.2-exp",
-        "provider": "hf_inference_client",
         "model": "deepseek-ai/DeepSeek-V3.2-Exp"
     },
     {
         "name": "deepseek-v3-base",
-        "provider": "hf_inference_client",
         "model": "deepseek-ai/DeepSeek-V3-Base"
     },
     {
         "name": "deepseek-fallback",
-        "provider": "hf_inference_client",
         "model": "deepseek-ai/DeepSeek-V3.2-Exp"
     }
 ]
 def get_next_provider():
-    """Get the next available InferenceClient for failover"""
     global current_provider_index
-    if not inference_clients:
         return None
-    client_info = inference_clients[current_provider_index]
-    current_provider_index = (current_provider_index + 1) % len(inference_clients)
     return client_info
 def call_deepseek_api(messages: List[Dict], client_info: Dict, max_retries: int = 3) -> Optional[str]:
-    """Call DeepSeek API via HuggingFace InferenceClient"""
     if not client_info:
         return None
     try:
         client = client_info["client"]
-        # Convert messages to a single prompt for text_generation
-        conversation = ""
-        for msg in messages:
-            if msg["role"] == "system":
-                conversation += f"System: {msg['content']}\n\n"
-            elif msg["role"] == "user":
-                conversation += f"User: {msg['content']}\n\n"
-            elif msg["role"] == "assistant":
-                conversation += f"Assistant: {msg['content']}\n\n"
-        conversation += "Assistant: "
-        # Use text_generation method instead of chat_completion
-        response = client.text_generation(
-            prompt=conversation,
-            max_new_tokens=1024,
             temperature=0.7,
             top_p=0.9,
-            do_sample=True,
-            return_full_text=False,
             stream=False
         )
         # Extract content from response
-        if isinstance(response, str):
-            content = response
-            logger.info(f"✅ Success with InferenceClient: {client_info['name']} ({client_info['model']})")
             return content.strip()
         else:
             logger.warning(f"⚠️ Unexpected response format from {client_info['name']}: {response}")
@@ -159,30 +144,38 @@ def call_deepseek_api(messages: List[Dict], client_info: Dict, max_retries: int
         else:
             logger.warning(f"⚠️ API error from {client_info['name']}: {str(e)}")
         return None
 def call_deepseek_with_failover(messages: List[Dict]) -> str:
-    """Call DeepSeek-V3.2-Exp with automatic InferenceClient failover"""
-    if not inference_clients:
-        return "InferenceClient not initialized. Please check HF_TOKEN configuration."
     clients_tried = []
     # Try all clients until one succeeds
-    for attempt in range(len(inference_clients)):
         client_info = get_next_provider()
         if not client_info:
             continue
         clients_tried.append(client_info['name'])
-        logger.info(f"🔄 Trying InferenceClient: {client_info['name']} (attempt {attempt + 1}/{len(inference_clients)})")
         result = call_deepseek_api(messages, client_info)
         if result:
             return result
     # If all clients failed
-    logger.error(f"❌ All InferenceClients failed: {', '.join(clients_tried)}")
     return f"I apologize, but all API providers ({', '.join(clients_tried)}) are currently unavailable. Please try again in a moment."
 def format_response(text):
@@ -315,7 +308,7 @@ Provide comprehensive analysis with specific numerical values for all calculated
         "year": year,
         "analysis_timestamp": datetime.now().isoformat(),
         "model": MODEL_NAME,
-        "providers": [c["name"] for c in inference_clients]
     }
     # Extract metrics from model response
@@ -371,8 +364,8 @@ def status():
         'model': MODEL_NAME,
         'version': AEGIS_VERSION,
         'regions': len(GLOBAL_REGIONS),
-        'providers': [c["name"] for c in inference_clients],
-        'current_provider': inference_clients[current_provider_index]["name"] if inference_clients else "none",
         'api_ready': True
     })
@@ -405,23 +398,23 @@ def chat():
                 'provider_status': 'HF_TOKEN missing'
             }), 500
-        if not inference_clients:
-            logger.error("InferenceClients not initialized!")
             return jsonify({
-                'error': 'InferenceClients not initialized. Please check HF_TOKEN configuration.',
-                'provider_status': 'InferenceClients not initialized'
             }), 500
         # Generate response using AEGIS Multi-Domain System with DeepSeek-V3.2-Exp
         logger.info("Generating AEGIS analysis...")
         response = analyze_with_aegis_conductor(message, analysis_type)
-        if not response or response.startswith("I apologize, but all API providers") or response.startswith("InferenceClient not initialized"):
-            logger.error("All InferenceClients failed or returned empty response")
             return jsonify({
                 'error': 'All API providers are currently unavailable. Please check your HF_TOKEN and try again.',
                 'response': response,
-                'provider_status': 'All InferenceClients failed'
             }), 503
         logger.info(f"Successfully generated response of length: {len(response)}")
@@ -431,10 +424,10 @@ def chat():
             'timestamp': time.time(),
             'model': f"AEGIS BIO LAB {AEGIS_VERSION} CONDUCTOR (DeepSeek-V3.2-Exp)",
             'analysis_type': analysis_type,
-            'provider': f"{inference_clients[current_provider_index]['name'] if inference_clients else 'none'} (InferenceClient)",
-            'hf_inference_client': True,
             'hf_token_configured': bool(HF_TOKEN and len(HF_TOKEN) > 10),
-            'clients_initialized': len(inference_clients)
         })
     except Exception as e:
@@ -503,16 +496,16 @@ def diagnostic():
             <strong>Model:</strong> {MODEL_NAME}
         </div>
-        <div class="status {'good' if inference_clients else 'bad'}">
-            <strong>InferenceClients:</strong> {len(inference_clients)} initialized
         </div>
         <div class="status good">
-            <strong>Current Client:</strong> {inference_clients[current_provider_index]["name"] if inference_clients else "none"}
         </div>
         <h2>🔧 Configuration Instructions</h2>
-        <p>Using HuggingFace InferenceClient (only HF_TOKEN required):</p>
         <ol>
             <li>Go to your space settings</li>
             <li>Click "Variables and secrets"</li>
@@ -535,10 +528,10 @@ def provider_status():
     """Get status of all InferenceClient providers"""
     provider_statuses = []
-    for i, client_info in enumerate(inference_clients):
         status_info = {
             "name": client_info["name"],
-            "provider_type": "hf_inference_client",
             "active": i == current_provider_index,
             "model": client_info.get("model", MODEL_NAME),
             "has_api_key": bool(HF_TOKEN and len(HF_TOKEN) > 10),
@@ -547,40 +540,40 @@ def provider_status():
         provider_statuses.append(status_info)
     # Count available providers
-    available_providers = len(inference_clients) if HF_TOKEN and len(HF_TOKEN) > 10 else 0
     return jsonify({
         "providers": provider_statuses,
-        "current_provider": inference_clients[current_provider_index]["name"] if inference_clients else "none",
-        "current_provider_type": "hf_inference_client",
-        "total_providers": len(inference_clients),
         "available_providers": available_providers,
         "model": MODEL_NAME,
         "api_keys_status": {
             "hf_token": bool(HF_TOKEN and len(HF_TOKEN) > 10),
-            "note": "Using HuggingFace InferenceClient - only HF_TOKEN required"
         }
     })
 @app.route('/switch_provider', methods=['POST'])
 def switch_provider():
-    """Manually switch to next InferenceClient provider"""
     global current_provider_index
-    if not inference_clients:
         return jsonify({
-            "error": "No InferenceClients available",
             "message": "Please check HF_TOKEN configuration"
         }), 500
-    old_client = inference_clients[current_provider_index]["name"]
-    current_provider_index = (current_provider_index + 1) % len(inference_clients)
-    new_client = inference_clients[current_provider_index]["name"]
     return jsonify({
-        "switched_from": f"{old_client} (InferenceClient)",
-        "switched_to": f"{new_client} (InferenceClient)",
-        "message": f"Switched from {old_client} to {new_client} InferenceClient",
         "model": MODEL_NAME
     })
@@ -593,12 +586,12 @@ def initialize_system():
     print(f"🤗 Model: {MODEL_NAME}")
     print(f"🔗 Endpoint: https://router.huggingface.co/v1")
-    if inference_clients:
-        client_list = ', '.join([f"{c['name']} ({c['model']})" for c in inference_clients])
-        print(f"📡 Available InferenceClients: {client_list}")
-        print(f"🔄 Automatic failover enabled across {len(inference_clients)} InferenceClients")
     else:
-        print("❌ No InferenceClients initialized - check HF_TOKEN")
     print(f"🌍 Global analysis across {len(GLOBAL_REGIONS)} regions")
     print(f"🔑 Using HuggingFace Token: {'✅ Valid' if HF_TOKEN and len(HF_TOKEN) > 10 else '❌ Missing'}")

 import os
 from dotenv import load_dotenv
 import random
+from openai import OpenAI
 # Load environment variables
 load_dotenv()
 # HuggingFace Token for all providers
 HF_TOKEN = os.getenv('HF_TOKEN', '')
+# Initialize OpenAI-compatible clients for DeepSeek models using HuggingFace router
+openai_clients = []
 if HF_TOKEN:
     try:
+        # Primary DeepSeek-V3.2-Exp client
+        primary_client = OpenAI(
+            api_key=HF_TOKEN,
             base_url="https://router.huggingface.co/v1"
         )
+        openai_clients.append({
             "name": "deepseek-v3.2-exp",
             "client": primary_client,
             "model": "deepseek-ai/DeepSeek-V3.2-Exp"
         })
+        # Secondary DeepSeek-V3-Base client
+        secondary_client = OpenAI(
+            api_key=HF_TOKEN,
             base_url="https://router.huggingface.co/v1"
         )
+        openai_clients.append({
             "name": "deepseek-v3-base",
             "client": secondary_client,
             "model": "deepseek-ai/DeepSeek-V3-Base"
         })
+        # Fallback client (same as primary)
+        fallback_client = OpenAI(
+            api_key=HF_TOKEN,
             base_url="https://router.huggingface.co/v1"
         )
+        openai_clients.append({
             "name": "deepseek-fallback",
             "client": fallback_client,
             "model": "deepseek-ai/DeepSeek-V3.2-Exp"
         })
     except Exception as e:
+        logger.error(f"Failed to initialize OpenAI clients: {e}")
+# Legacy API_PROVIDERS for compatibility (now using OpenAI client)
 API_PROVIDERS = [
     {
         "name": "deepseek-v3.2-exp",
+        "provider": "hf_router_openai",
         "model": "deepseek-ai/DeepSeek-V3.2-Exp"
     },
     {
         "name": "deepseek-v3-base",
+        "provider": "hf_router_openai",
         "model": "deepseek-ai/DeepSeek-V3-Base"
     },
     {
         "name": "deepseek-fallback",
+        "provider": "hf_router_openai",
         "model": "deepseek-ai/DeepSeek-V3.2-Exp"
     }
 ]
 def get_next_provider():
+    """Get the next available OpenAI client for failover"""
     global current_provider_index
+    if not openai_clients:
         return None
+    client_info = openai_clients[current_provider_index]
+    current_provider_index = (current_provider_index + 1) % len(openai_clients)
     return client_info
 def call_deepseek_api(messages: List[Dict], client_info: Dict, max_retries: int = 3) -> Optional[str]:
+    """Call DeepSeek API via HuggingFace Router using OpenAI client"""
     if not client_info:
         return None
     try:
         client = client_info["client"]
+        model = client_info["model"]
+        # Use OpenAI client with HuggingFace router
+        response = client.chat.completions.create(
+            model=model,
+            messages=messages,
+            max_tokens=1024,
             temperature=0.7,
             top_p=0.9,
             stream=False
         )
         # Extract content from response
+        if response.choices and len(response.choices) > 0:
+            content = response.choices[0].message.content
+            logger.info(f"✅ Success with OpenAI client: {client_info['name']} ({client_info['model']})")
             return content.strip()
         else:
             logger.warning(f"⚠️ Unexpected response format from {client_info['name']}: {response}")
         else:
             logger.warning(f"⚠️ API error from {client_info['name']}: {str(e)}")
         return None
+        if "rate limit" in error_msg or "429" in error_msg:
+            logger.warning(f"💸 Rate limit reached for {client_info['name']}, switching to next provider...")
+        elif "503" in error_msg or "service unavailable" in error_msg:
+            logger.warning(f"⏳ Model loading for {client_info['name']}, waiting...")
+            time.sleep(10)  # Wait for model to load
+        else:
+            logger.warning(f"⚠️ API error from {client_info['name']}: {str(e)}")
+        return None
 def call_deepseek_with_failover(messages: List[Dict]) -> str:
+    """Call DeepSeek-V3.2-Exp with automatic OpenAI client failover"""
+    if not openai_clients:
+        return "OpenAI clients not initialized. Please check HF_TOKEN configuration."
     clients_tried = []
     # Try all clients until one succeeds
+    for attempt in range(len(openai_clients)):
         client_info = get_next_provider()
         if not client_info:
             continue
         clients_tried.append(client_info['name'])
+        logger.info(f"🔄 Trying OpenAI client: {client_info['name']} (attempt {attempt + 1}/{len(openai_clients)})")
         result = call_deepseek_api(messages, client_info)
         if result:
             return result
     # If all clients failed
+    logger.error(f"❌ All OpenAI clients failed: {', '.join(clients_tried)}")
     return f"I apologize, but all API providers ({', '.join(clients_tried)}) are currently unavailable. Please try again in a moment."
 def format_response(text):
         "year": year,
         "analysis_timestamp": datetime.now().isoformat(),
         "model": MODEL_NAME,
+        "providers": [c["name"] for c in openai_clients]
     }
     # Extract metrics from model response
         'model': MODEL_NAME,
         'version': AEGIS_VERSION,
         'regions': len(GLOBAL_REGIONS),
+        'providers': [c["name"] for c in openai_clients],
+        'current_provider': openai_clients[current_provider_index]["name"] if openai_clients else "none",
         'api_ready': True
     })
                 'provider_status': 'HF_TOKEN missing'
             }), 500
+        if not openai_clients:
+            logger.error("OpenAI clients not initialized!")
             return jsonify({
+                'error': 'OpenAI clients not initialized. Please check HF_TOKEN configuration.',
+                'provider_status': 'OpenAI clients not initialized'
             }), 500
         # Generate response using AEGIS Multi-Domain System with DeepSeek-V3.2-Exp
         logger.info("Generating AEGIS analysis...")
         response = analyze_with_aegis_conductor(message, analysis_type)
+        if not response or response.startswith("I apologize, but all API providers") or response.startswith("OpenAI clients not initialized"):
+            logger.error("All OpenAI clients failed or returned empty response")
             return jsonify({
                 'error': 'All API providers are currently unavailable. Please check your HF_TOKEN and try again.',
                 'response': response,
+                'provider_status': 'All OpenAI clients failed'
             }), 503
         logger.info(f"Successfully generated response of length: {len(response)}")
             'timestamp': time.time(),
             'model': f"AEGIS BIO LAB {AEGIS_VERSION} CONDUCTOR (DeepSeek-V3.2-Exp)",
             'analysis_type': analysis_type,
+            'provider': f"{openai_clients[current_provider_index]['name'] if openai_clients else 'none'} (OpenAI)",
+            'hf_router_openai': True,
             'hf_token_configured': bool(HF_TOKEN and len(HF_TOKEN) > 10),
+            'clients_initialized': len(openai_clients)
         })
     except Exception as e:
             <strong>Model:</strong> {MODEL_NAME}
         </div>
+        <div class="status {'good' if openai_clients else 'bad'}">
+            <strong>OpenAI Clients:</strong> {len(openai_clients)} initialized
         </div>
         <div class="status good">
+            <strong>Current Client:</strong> {openai_clients[current_provider_index]["name"] if openai_clients else "none"}
         </div>
         <h2>🔧 Configuration Instructions</h2>
+        <p>Using HuggingFace Router with OpenAI client (only HF_TOKEN required):</p>
         <ol>
             <li>Go to your space settings</li>
             <li>Click "Variables and secrets"</li>
     """Get status of all InferenceClient providers"""
     provider_statuses = []
+    for i, client_info in enumerate(openai_clients):
         status_info = {
             "name": client_info["name"],
+            "provider_type": "hf_router_openai",
             "active": i == current_provider_index,
             "model": client_info.get("model", MODEL_NAME),
             "has_api_key": bool(HF_TOKEN and len(HF_TOKEN) > 10),
         provider_statuses.append(status_info)
     # Count available providers
+    available_providers = len(openai_clients) if HF_TOKEN and len(HF_TOKEN) > 10 else 0
     return jsonify({
         "providers": provider_statuses,
+        "current_provider": openai_clients[current_provider_index]["name"] if openai_clients else "none",
+        "current_provider_type": "hf_router_openai",
+        "total_providers": len(openai_clients),
         "available_providers": available_providers,
         "model": MODEL_NAME,
         "api_keys_status": {
             "hf_token": bool(HF_TOKEN and len(HF_TOKEN) > 10),
+            "note": "Using HuggingFace Router with OpenAI client - only HF_TOKEN required"
         }
     })
 @app.route('/switch_provider', methods=['POST'])
 def switch_provider():
+    """Manually switch to next OpenAI client provider"""
     global current_provider_index
+    if not openai_clients:
         return jsonify({
+            "error": "No OpenAI clients available",
             "message": "Please check HF_TOKEN configuration"
         }), 500
+    old_client = openai_clients[current_provider_index]["name"]
+    current_provider_index = (current_provider_index + 1) % len(openai_clients)
+    new_client = openai_clients[current_provider_index]["name"]
     return jsonify({
+        "switched_from": f"{old_client} (OpenAI)",
+        "switched_to": f"{new_client} (OpenAI)",
+        "message": f"Switched from {old_client} to {new_client} OpenAI client",
         "model": MODEL_NAME
     })
     print(f"🤗 Model: {MODEL_NAME}")
     print(f"🔗 Endpoint: https://router.huggingface.co/v1")
+    if openai_clients:
+        client_list = ', '.join([f"{c['name']} ({c['model']})" for c in openai_clients])
+        print(f"📡 Available OpenAI clients: {client_list}")
+        print(f"🔄 Automatic failover enabled across {len(openai_clients)} OpenAI clients")
     else:
+        print("❌ No OpenAI clients initialized - check HF_TOKEN")
     print(f"🌍 Global analysis across {len(GLOBAL_REGIONS)} regions")
     print(f"🔑 Using HuggingFace Token: {'✅ Valid' if HF_TOKEN and len(HF_TOKEN) > 10 else '❌ Missing'}")