DataEngEval

Sleeping

App Files Files Community

uparekh01151 commited on Sep 20, 2025

Commit

682dc03

1 Parent(s): 2ec7e68

Fix Nebius provider to use chat.completions.create for conversational models

Browse files

Files changed (1) hide show

src/models_registry.py +30 -11

src/models_registry.py CHANGED Viewed

@@ -85,17 +85,34 @@ class HuggingFaceInference:
                 api_key=os.environ.get("HF_TOKEN")
             )
-            # Use text_generation for all providers (simplified approach)
-            result = client.text_generation(
-                prompt=prompt,
-                model=model_id,
-                max_new_tokens=params.get('max_new_tokens', 128),
-                temperature=params.get('temperature', 0.1),
-                top_p=params.get('top_p', 0.9),
-                return_full_text=False  # Only return the generated part
-            )
-            return result
         except Exception as e:
             # Improved error handling with detailed error messages
@@ -112,6 +129,8 @@ class HuggingFaceInference:
                 raise Exception(f"Request timeout - model may be loading on {provider}")
             elif "not supported for task" in error_msg:
                 raise Exception(f"Model {model_id} task not supported by {provider} provider: {error_msg}")
             else:
                 raise Exception(f"{provider} API error: {error_msg}")

                 api_key=os.environ.get("HF_TOKEN")
             )
+            # Use different methods based on provider capabilities
+            if provider == "nebius":
+                # Nebius provider only supports conversational tasks, use chat completion
+                completion = client.chat.completions.create(
+                    model=model_id,
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": prompt
+                        }
+                    ],
+                    max_tokens=params.get('max_new_tokens', 128),
+                    temperature=params.get('temperature', 0.1),
+                    top_p=params.get('top_p', 0.9)
+                )
+                # Extract the content from the response
+                return completion.choices[0].message.content
+            else:
+                # Other providers use text_generation
+                result = client.text_generation(
+                    prompt=prompt,
+                    model=model_id,
+                    max_new_tokens=params.get('max_new_tokens', 128),
+                    temperature=params.get('temperature', 0.1),
+                    top_p=params.get('top_p', 0.9),
+                    return_full_text=False  # Only return the generated part
+                )
+                return result
         except Exception as e:
             # Improved error handling with detailed error messages
                 raise Exception(f"Request timeout - model may be loading on {provider}")
             elif "not supported for task" in error_msg:
                 raise Exception(f"Model {model_id} task not supported by {provider} provider: {error_msg}")
+            elif "not supported by provider" in error_msg:
+                raise Exception(f"Model {model_id} not supported by {provider} provider: {error_msg}")
             else:
                 raise Exception(f"{provider} API error: {error_msg}")