Spaces:

JatinAutonomousLabs
/

Research_AI_Assistant

Sleeping

App Files Files Community

JatsTheAIGen commited on Oct 29, 2025

Commit

8f308fb

1 Parent(s): edbd656

router fixed v2

Browse files

Files changed (3) hide show

llm_router.py +52 -58
src/llm_router.py +58 -64
test_task_type_fix.py +1 -0

llm_router.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# llm_router.py
 import logging
 from models_config import LLM_CONFIG
@@ -28,6 +28,7 @@ class LLMRouter:
             model_config = self._get_fallback_model(task_type)
             logger.info(f"Fallback model: {model_config['model_id']}")
         result = await self._call_hf_endpoint(model_config, prompt, task_type, **kwargs)
         logger.info(f"Inference complete for {task_type}")
         return result
@@ -71,8 +72,10 @@ class LLMRouter:
     async def _call_hf_endpoint(self, model_config: dict, prompt: str, task_type: str, **kwargs):
         """
-        Make actual call to Hugging Face Chat Completions API
         Uses the correct chat completions protocol
         """
         try:
             import requests
@@ -88,6 +91,8 @@ class LLMRouter:
             logger.info("LLM API REQUEST - COMPLETE PROMPT:")
             logger.info("=" * 80)
             logger.info(f"Model: {model_id}")
             logger.info(f"Task Type: {task_type}")
             logger.info(f"Prompt Length: {len(prompt)} characters")
             logger.info("-" * 40)
@@ -98,76 +103,41 @@ class LLMRouter:
             logger.info("END OF PROMPT")
             logger.info("=" * 80)
-            headers = {
-                "Authorization": f"Bearer {self.hf_token}",
-                "Content-Type": "application/json"
-            }
-            # Prepare payload in chat completions format
-            # Extract the actual question from the prompt if it's in a structured format
-            user_message = prompt if "User Question:" not in prompt else prompt.split("User Question:")[1].split("\n")[0].strip()
             payload = {
-                "model": f"{model_id}:together",  # Use the Together endpoint as specified
                 "messages": [
                     {
                         "role": "user",
-                        "content": user_message
                     }
                 ],
-                "max_tokens": kwargs.get("max_tokens", 2000),
-                "temperature": kwargs.get("temperature", 0.7),
-                "top_p": kwargs.get("top_p", 0.95)
             }
-            # Log complete API request details
-            logger.info("=" * 80)
-            logger.info("LLM API REQUEST DETAILS:")
-            logger.info("=" * 80)
-            logger.info(f"API URL: {api_url}")
-            logger.info(f"Model: {model_id}")
-            logger.info(f"Task Type: {task_type}")
-            logger.info(f"Max Tokens: {kwargs.get('max_tokens', 2000)}")
-            logger.info(f"Temperature: {kwargs.get('temperature', 0.7)}")
-            logger.info(f"Top P: {kwargs.get('top_p', 0.95)}")
-            logger.info(f"User Message Length: {len(user_message)} characters")
-            logger.info("-" * 40)
-            logger.info("API PAYLOAD:")
-            logger.info("-" * 40)
-            import json
-            logger.info(json.dumps(payload, indent=2))
-            logger.info("-" * 40)
-            logger.info("END OF API REQUEST")
-            logger.info("=" * 80)
-            # Make the API call
-            response = requests.post(api_url, json=payload, headers=headers, timeout=60)
             if response.status_code == 200:
                 result = response.json()
-                # Log complete API response metadata
-                logger.info("=" * 80)
-                logger.info("LLM API RESPONSE METADATA:")
-                logger.info("=" * 80)
-                logger.info(f"Status Code: {response.status_code}")
-                logger.info(f"Response Headers: {dict(response.headers)}")
-                logger.info(f"Response Size: {len(response.text)} characters")
-                logger.info("-" * 40)
-                logger.info("COMPLETE API RESPONSE JSON:")
-                logger.info("-" * 40)
-                logger.info(json.dumps(result, indent=2))
-                logger.info("-" * 40)
-                logger.info("END OF API RESPONSE METADATA")
-                logger.info("=" * 80)
-                # Handle chat completions response format
-                if "choices" in result and len(result["choices"]) > 0:
-                    message = result["choices"][0].get("message", {})
-                    generated_text = message.get("content", "")
-                    # Ensure we always return a string, never None
-                    if not generated_text or not isinstance(generated_text, str):
                         logger.warning(f"Empty or invalid response, using fallback")
                         return None
@@ -176,6 +146,8 @@ class LLMRouter:
                     logger.info("COMPLETE LLM API RESPONSE:")
                     logger.info("=" * 80)
                     logger.info(f"Model: {model_id}")
                     logger.info(f"Task Type: {task_type}")
                     logger.info(f"Response Length: {len(generated_text)} characters")
                     logger.info("-" * 40)
@@ -193,6 +165,8 @@ class LLMRouter:
                 # Model is loading, retry with simpler model
                 logger.warning(f"Model loading (503), trying fallback")
                 fallback_config = self._get_fallback_model("response_synthesis")
                 return await self._call_hf_endpoint(fallback_config, prompt, task_type, **kwargs)
             else:
                 logger.error(f"HF API error: {response.status_code} - {response.text}")
@@ -204,4 +178,24 @@ class LLMRouter:
         except Exception as e:
             logger.error(f"Error calling HF endpoint: {e}", exc_info=True)
             return None

+# llm_router.py - FIXED VERSION
 import logging
 from models_config import LLM_CONFIG
             model_config = self._get_fallback_model(task_type)
             logger.info(f"Fallback model: {model_config['model_id']}")
+        # FIXED: Ensure task_type is passed to the _call_hf_endpoint method
         result = await self._call_hf_endpoint(model_config, prompt, task_type, **kwargs)
         logger.info(f"Inference complete for {task_type}")
         return result
     async def _call_hf_endpoint(self, model_config: dict, prompt: str, task_type: str, **kwargs):
         """
+        FIXED: Make actual call to Hugging Face Chat Completions API
         Uses the correct chat completions protocol
+        IMPORTANT: task_type parameter is now properly included in the method signature
         """
         try:
             import requests
             logger.info("LLM API REQUEST - COMPLETE PROMPT:")
             logger.info("=" * 80)
             logger.info(f"Model: {model_id}")
+            # FIXED: task_type is now properly available as a parameter
             logger.info(f"Task Type: {task_type}")
             logger.info(f"Prompt Length: {len(prompt)} characters")
             logger.info("-" * 40)
             logger.info("END OF PROMPT")
             logger.info("=" * 80)
+            # Prepare the request payload
+            max_tokens = kwargs.get('max_tokens', 512)
+            temperature = kwargs.get('temperature', 0.7)
             payload = {
+                "model": model_id,
                 "messages": [
                     {
                         "role": "user",
+                        "content": prompt
                     }
                 ],
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "stream": False
             }
+            headers = {
+                "Authorization": f"Bearer {self.hf_token}",
+                "Content-Type": "application/json"
+            }
+            logger.info(f"Sending request to: {api_url}")
+            logger.debug(f"Payload: {payload}")
+            response = requests.post(api_url, json=payload, headers=headers, timeout=30)
             if response.status_code == 200:
                 result = response.json()
+                logger.debug(f"Raw response: {result}")
+                if 'choices' in result and len(result['choices']) > 0:
+                    generated_text = result['choices'][0]['message']['content']
+                    if not generated_text or generated_text.strip() == "":
                         logger.warning(f"Empty or invalid response, using fallback")
                         return None
                     logger.info("COMPLETE LLM API RESPONSE:")
                     logger.info("=" * 80)
                     logger.info(f"Model: {model_id}")
+                    # FIXED: task_type is now properly available
                     logger.info(f"Task Type: {task_type}")
                     logger.info(f"Response Length: {len(generated_text)} characters")
                     logger.info("-" * 40)
                 # Model is loading, retry with simpler model
                 logger.warning(f"Model loading (503), trying fallback")
                 fallback_config = self._get_fallback_model("response_synthesis")
+                # FIXED: Ensure task_type is passed in recursive call
                 return await self._call_hf_endpoint(fallback_config, prompt, task_type, **kwargs)
             else:
                 logger.error(f"HF API error: {response.status_code} - {response.text}")
         except Exception as e:
             logger.error(f"Error calling HF endpoint: {e}", exc_info=True)
             return None
+    async def get_available_models(self):
+        """
+        Get list of available models for testing
+        """
+        return list(LLM_CONFIG["models"].keys())
+    async def health_check(self):
+        """
+        Perform health check on all models
+        """
+        health_status = {}
+        for model_name, model_config in LLM_CONFIG["models"].items():
+            model_id = model_config["model_id"]
+            is_healthy = await self._is_model_healthy(model_id)
+            health_status[model_name] = {
+                "model_id": model_id,
+                "healthy": is_healthy
+            }
+        return health_status

src/llm_router.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# llm_router.py
 import logging
 from .models_config import LLM_CONFIG
@@ -28,7 +28,8 @@ class LLMRouter:
             model_config = self._get_fallback_model(task_type)
             logger.info(f"Fallback model: {model_config['model_id']}")
-        result = await self._call_hf_endpoint(model_config, prompt, **kwargs)
         logger.info(f"Inference complete for {task_type}")
         return result
@@ -69,18 +70,19 @@ class LLMRouter:
         }
         return fallback_map.get(task_type, LLM_CONFIG["models"]["reasoning_primary"])
-    async def _call_hf_endpoint(self, model_config: dict, prompt: str, **kwargs):
         """
-        Make actual call to Hugging Face Chat Completions API
         Uses the correct chat completions protocol
         """
         try:
             import requests
             model_id = model_config["model_id"]
-            is_chat_model = model_config.get("is_chat_model", True)
-            # Use the chat completions endpoint for chat models
             api_url = "https://router.huggingface.co/v1/chat/completions"
             logger.info(f"Calling HF Chat Completions API for model: {model_id}")
@@ -89,6 +91,8 @@ class LLMRouter:
             logger.info("LLM API REQUEST - COMPLETE PROMPT:")
             logger.info("=" * 80)
             logger.info(f"Model: {model_id}")
             logger.info(f"Task Type: {task_type}")
             logger.info(f"Prompt Length: {len(prompt)} characters")
             logger.info("-" * 40)
@@ -99,76 +103,41 @@ class LLMRouter:
             logger.info("END OF PROMPT")
             logger.info("=" * 80)
-            headers = {
-                "Authorization": f"Bearer {self.hf_token}",
-                "Content-Type": "application/json"
-            }
-            # Prepare payload in chat completions format
-            # Extract the actual question from the prompt if it's in a structured format
-            user_message = prompt if "User Question:" not in prompt else prompt.split("User Question:")[1].split("\n")[0].strip()
             payload = {
-                "model": f"{model_id}:together",  # Use the Together endpoint as specified
                 "messages": [
                     {
                         "role": "user",
-                        "content": user_message
                     }
                 ],
-                "max_tokens": kwargs.get("max_tokens", 2000),
-                "temperature": kwargs.get("temperature", 0.7),
-                "top_p": kwargs.get("top_p", 0.95)
             }
-            # Log complete API request details
-            logger.info("=" * 80)
-            logger.info("LLM API REQUEST DETAILS:")
-            logger.info("=" * 80)
-            logger.info(f"API URL: {api_url}")
-            logger.info(f"Model: {model_id}")
-            logger.info(f"Task Type: {task_type}")
-            logger.info(f"Max Tokens: {kwargs.get('max_tokens', 2000)}")
-            logger.info(f"Temperature: {kwargs.get('temperature', 0.7)}")
-            logger.info(f"Top P: {kwargs.get('top_p', 0.95)}")
-            logger.info(f"User Message Length: {len(user_message)} characters")
-            logger.info("-" * 40)
-            logger.info("API PAYLOAD:")
-            logger.info("-" * 40)
-            import json
-            logger.info(json.dumps(payload, indent=2))
-            logger.info("-" * 40)
-            logger.info("END OF API REQUEST")
-            logger.info("=" * 80)
-            # Make the API call
-            response = requests.post(api_url, json=payload, headers=headers, timeout=60)
             if response.status_code == 200:
                 result = response.json()
-                # Log complete API response metadata
-                logger.info("=" * 80)
-                logger.info("LLM API RESPONSE METADATA:")
-                logger.info("=" * 80)
-                logger.info(f"Status Code: {response.status_code}")
-                logger.info(f"Response Headers: {dict(response.headers)}")
-                logger.info(f"Response Size: {len(response.text)} characters")
-                logger.info("-" * 40)
-                logger.info("COMPLETE API RESPONSE JSON:")
-                logger.info("-" * 40)
-                logger.info(json.dumps(result, indent=2))
-                logger.info("-" * 40)
-                logger.info("END OF API RESPONSE METADATA")
-                logger.info("=" * 80)
-                # Handle chat completions response format
-                if "choices" in result and len(result["choices"]) > 0:
-                    message = result["choices"][0].get("message", {})
-                    generated_text = message.get("content", "")
-                    # Ensure we always return a string, never None
-                    if not generated_text or not isinstance(generated_text, str):
                         logger.warning(f"Empty or invalid response, using fallback")
                         return None
@@ -177,6 +146,8 @@ class LLMRouter:
                     logger.info("COMPLETE LLM API RESPONSE:")
                     logger.info("=" * 80)
                     logger.info(f"Model: {model_id}")
                     logger.info(f"Task Type: {task_type}")
                     logger.info(f"Response Length: {len(generated_text)} characters")
                     logger.info("-" * 40)
@@ -194,14 +165,37 @@ class LLMRouter:
                 # Model is loading, retry with simpler model
                 logger.warning(f"Model loading (503), trying fallback")
                 fallback_config = self._get_fallback_model("response_synthesis")
-                return await self._call_hf_endpoint(fallback_config, prompt, **kwargs)
             else:
                 logger.error(f"HF API error: {response.status_code} - {response.text}")
                 return None
         except ImportError:
-            logger.warning("requests library not available, API call failed")
-            return None
         except Exception as e:
             logger.error(f"Error calling HF endpoint: {e}", exc_info=True)
             return None

+# llm_router.py - FIXED VERSION
 import logging
 from .models_config import LLM_CONFIG
             model_config = self._get_fallback_model(task_type)
             logger.info(f"Fallback model: {model_config['model_id']}")
+        # FIXED: Ensure task_type is passed to the _call_hf_endpoint method
+        result = await self._call_hf_endpoint(model_config, prompt, task_type, **kwargs)
         logger.info(f"Inference complete for {task_type}")
         return result
         }
         return fallback_map.get(task_type, LLM_CONFIG["models"]["reasoning_primary"])
+    async def _call_hf_endpoint(self, model_config: dict, prompt: str, task_type: str, **kwargs):
         """
+        FIXED: Make actual call to Hugging Face Chat Completions API
         Uses the correct chat completions protocol
+        IMPORTANT: task_type parameter is now properly included in the method signature
         """
         try:
             import requests
             model_id = model_config["model_id"]
+            # Use the chat completions endpoint
             api_url = "https://router.huggingface.co/v1/chat/completions"
             logger.info(f"Calling HF Chat Completions API for model: {model_id}")
             logger.info("LLM API REQUEST - COMPLETE PROMPT:")
             logger.info("=" * 80)
             logger.info(f"Model: {model_id}")
+            # FIXED: task_type is now properly available as a parameter
             logger.info(f"Task Type: {task_type}")
             logger.info(f"Prompt Length: {len(prompt)} characters")
             logger.info("-" * 40)
             logger.info("END OF PROMPT")
             logger.info("=" * 80)
+            # Prepare the request payload
+            max_tokens = kwargs.get('max_tokens', 512)
+            temperature = kwargs.get('temperature', 0.7)
             payload = {
+                "model": model_id,
                 "messages": [
                     {
                         "role": "user",
+                        "content": prompt
                     }
                 ],
+                "max_tokens": max_tokens,
+                "temperature": temperature,
+                "stream": False
             }
+            headers = {
+                "Authorization": f"Bearer {self.hf_token}",
+                "Content-Type": "application/json"
+            }
+            logger.info(f"Sending request to: {api_url}")
+            logger.debug(f"Payload: {payload}")
+            response = requests.post(api_url, json=payload, headers=headers, timeout=30)
             if response.status_code == 200:
                 result = response.json()
+                logger.debug(f"Raw response: {result}")
+                if 'choices' in result and len(result['choices']) > 0:
+                    generated_text = result['choices'][0]['message']['content']
+                    if not generated_text or generated_text.strip() == "":
                         logger.warning(f"Empty or invalid response, using fallback")
                         return None
                     logger.info("COMPLETE LLM API RESPONSE:")
                     logger.info("=" * 80)
                     logger.info(f"Model: {model_id}")
+                    # FIXED: task_type is now properly available
                     logger.info(f"Task Type: {task_type}")
                     logger.info(f"Response Length: {len(generated_text)} characters")
                     logger.info("-" * 40)
                 # Model is loading, retry with simpler model
                 logger.warning(f"Model loading (503), trying fallback")
                 fallback_config = self._get_fallback_model("response_synthesis")
+                # FIXED: Ensure task_type is passed in recursive call
+                return await self._call_hf_endpoint(fallback_config, prompt, task_type, **kwargs)
             else:
                 logger.error(f"HF API error: {response.status_code} - {response.text}")
                 return None
         except ImportError:
+            logger.warning("requests library not available, using mock response")
+            return f"[Mock] Response to: {prompt[:100]}..."
         except Exception as e:
             logger.error(f"Error calling HF endpoint: {e}", exc_info=True)
             return None
+    async def get_available_models(self):
+        """
+        Get list of available models for testing
+        """
+        return list(LLM_CONFIG["models"].keys())
+    async def health_check(self):
+        """
+        Perform health check on all models
+        """
+        health_status = {}
+        for model_name, model_config in LLM_CONFIG["models"].items():
+            model_id = model_config["model_id"]
+            is_healthy = await self._is_model_healthy(model_id)
+            health_status[model_name] = {
+                "model_id": model_id,
+                "healthy": is_healthy
+            }
+        return health_status

test_task_type_fix.py CHANGED Viewed

@@ -153,3 +153,4 @@ if __name__ == "__main__":
         print("The method signature is not correct.")
     print("\nCheck 'test_task_type_fix.log' file for detailed logs.")


153	print("The method signature is not correct.")
154
155	print("\nCheck 'test_task_type_fix.log' file for detailed logs.")
156	+