DataEngEval

Sleeping

App Files Files Community

uparekh01151 commited on Sep 20, 2025

Commit

e65f7af

1 Parent(s): 710113d

Add Qwen3-Next-80B model with Together provider and update InferenceClient to support multiple providers

Browse files

Files changed (2) hide show

config/models.yaml +6 -24
src/models_registry.py +19 -16

config/models.yaml CHANGED Viewed

@@ -1,28 +1,10 @@
 models:
-  # Working Models - Using Hugging Face Inference API
-  - name: "BlenderBot-400M"
-    provider: "huggingface"
-    model_id: "facebook/blenderbot-400M-distill"
     params:
-      max_new_tokens: 128
       temperature: 0.1
       top_p: 0.9
-    description: "BlenderBot 400M - Conversational AI model"
-  - name: "DialoGPT-Medium"
-    provider: "huggingface"
-    model_id: "microsoft/DialoGPT-medium"
-    params:
-      max_new_tokens: 128
-      temperature: 0.1
-      top_p: 0.9
-    description: "DialoGPT Medium - Conversational model"
-  - name: "GPT-2"
-    provider: "huggingface"
-    model_id: "gpt2"
-    params:
-      max_new_tokens: 128
-      temperature: 0.1
-      top_p: 0.9
-    description: "GPT-2 model - Original transformer model"

 models:
+  # Qwen Model with Together Provider
+  - name: "Qwen3-Next-80B"
+    provider: "together"
+    model_id: "Qwen/Qwen3-Next-80B-A3B-Instruct"
     params:
+      max_new_tokens: 256
       temperature: 0.1
       top_p: 0.9
+    description: "Qwen3-Next-80B - Advanced instruction-following model via Together AI"

src/models_registry.py CHANGED Viewed

@@ -74,17 +74,19 @@ class HuggingFaceInference:
     def __init__(self, api_token: Optional[str] = None):
         self.api_token = api_token or os.getenv("HF_TOKEN")
-        # Initialize InferenceClient with correct provider
-        self.client = InferenceClient(
-            provider="hf-inference",  # Fixed: was "huggingface", now "hf-inference"
-            api_key=os.environ.get("HF_TOKEN")
-        )
-    def generate(self, model_id: str, prompt: str, params: Dict[str, Any]) -> str:
-        """Generate text using Hugging Face Inference API."""
         try:
             # Use text_generation method with correct parameters
-            result = self.client.text_generation(
                 prompt=prompt,
                 model=model_id,
                 max_new_tokens=params.get('max_new_tokens', 128),
@@ -101,15 +103,15 @@ class HuggingFaceInference:
             print(f"🔍 Debug - Full error: {error_msg}")
             if "404" in error_msg or "Not Found" in error_msg:
-                raise Exception(f"Model not found: {model_id} - Model may not be available via Inference API")
             elif "401" in error_msg or "Unauthorized" in error_msg:
-                raise Exception(f"Authentication failed - check HF_TOKEN")
             elif "503" in error_msg or "Service Unavailable" in error_msg:
-                raise Exception(f"Model {model_id} is loading, please try again in a moment")
             elif "timeout" in error_msg.lower():
-                raise Exception(f"Request timeout - model may be loading")
             else:
-                raise Exception(f"Hugging Face API error: {error_msg}")
 class ModelInterface:
@@ -168,12 +170,13 @@ class ModelInterface:
             return self._generate_mock_sql(model_config, prompt)
         try:
-            if model_config.provider == "huggingface":
-                print(f"🤗 Using Hugging Face Inference API for {model_config.name}")
                 return self.hf_interface.generate(
                     model_config.model_id,
                     prompt,
-                    model_config.params
                 )
             else:
                 raise ValueError(f"Unsupported provider: {model_config.provider}")

     def __init__(self, api_token: Optional[str] = None):
         self.api_token = api_token or os.getenv("HF_TOKEN")
+        # We'll create clients dynamically based on provider
+    def generate(self, model_id: str, prompt: str, params: Dict[str, Any], provider: str = "hf-inference") -> str:
+        """Generate text using Hugging Face Inference API with specified provider."""
         try:
+            # Create InferenceClient with the specified provider
+            client = InferenceClient(
+                provider=provider,
+                api_key=os.environ.get("HF_TOKEN")
+            )
             # Use text_generation method with correct parameters
+            result = client.text_generation(
                 prompt=prompt,
                 model=model_id,
                 max_new_tokens=params.get('max_new_tokens', 128),
             print(f"🔍 Debug - Full error: {error_msg}")
             if "404" in error_msg or "Not Found" in error_msg:
+                raise Exception(f"Model not found: {model_id} - Model may not be available via {provider} provider")
             elif "401" in error_msg or "Unauthorized" in error_msg:
+                raise Exception(f"Authentication failed - check HF_TOKEN for {provider} provider")
             elif "503" in error_msg or "Service Unavailable" in error_msg:
+                raise Exception(f"Model {model_id} is loading on {provider}, please try again in a moment")
             elif "timeout" in error_msg.lower():
+                raise Exception(f"Request timeout - model may be loading on {provider}")
             else:
+                raise Exception(f"{provider} API error: {error_msg}")
 class ModelInterface:
             return self._generate_mock_sql(model_config, prompt)
         try:
+            if model_config.provider in ["huggingface", "hf-inference", "together"]:
+                print(f"🤗 Using {model_config.provider} Inference API for {model_config.name}")
                 return self.hf_interface.generate(
                     model_config.model_id,
                     prompt,
+                    model_config.params,
+                    model_config.provider
                 )
             else:
                 raise ValueError(f"Unsupported provider: {model_config.provider}")