Spaces:

zxc4wewewe
/

New_folder_2

Runtime error

File size: 19,295 Bytes

621ec47

"""

Hugging Face API Client

Provides methods for interacting with HuggingFace Inference API

"""
import os
import requests
from typing import Optional, List, Dict, Any
from huggingface_hub import InferenceClient, HfApi
from utils import load_settings

# Settings paths
SETTINGS_DIR = os.path.join(os.path.dirname(__file__), 'settings')
APP_SETTINGS_FILE = os.path.join(SETTINGS_DIR, 'app.json')

# Get HF token from settings
HF_TOKEN = load_settings(APP_SETTINGS_FILE).get('hf_token')
API_BASE = "https://api-inference.huggingface.co"


class HuggingFaceAPI:
    def __init__(self, token: str = HF_TOKEN):
        self.token = token
        self.headers = {
            "Authorization": f"Bearer {token}",
            "Content-Type": "application/json"
        }
        self.client = InferenceClient(token=token)
        self.hf_api = HfApi(token=token)

    def model_info(self, model_id: str):
        """Get model info using HfApi (compatible with hf.py)"""
        return self.hf_api.model_info(model_id)

    def list_models(self, **kwargs):
        """List models using HfApi (compatible with hf.py)"""
        return self.hf_api.list_models(**kwargs)

    def chat_completion(

        self,

        model: str,

        messages: List[Dict[str, str]],

        max_tokens: int = 500,

        temperature: float = 0.7,

        stream: bool = False

    ) -> Dict[str, Any]:
        """

        Send a chat completion request to HuggingFace API using huggingface_hub.

        

        Args:

            model: Model ID (e.g., "meta-llama/Llama-3.2-3B-Instruct")

            messages: List of message dicts with 'role' and 'content'

            max_tokens: Maximum tokens to generate

            temperature: Sampling temperature (0.0 - 1.0)

            stream: Whether to stream the response

        

        Returns:

            API response as dict

        """
        # Validate model before use
        validation_result = self.validate_model(model)
        if not validation_result["valid"]:
            # Try fallback models
            fallback_models = validation_result.get("fallback_models", [])
            if fallback_models:
                # Use the first fallback model
                fallback_model = fallback_models[0]["id"]
                print(f"Warning: Model {model} not supported. Using fallback model {fallback_model}")
                model = fallback_model
            else:
                raise ValueError(f"Model {model} is not supported and no fallback models available. "
                               f"Error: {validation_result.get('error', 'Unknown error')}")

        try:
            response = self.client.chat_completion(
                model=model,
                messages=messages,
                max_tokens=max_tokens,
                temperature=temperature,
                stream=stream
            )
        except Exception as e:
            error_str = str(e).lower()
            if "model_not_supported" in error_str or "not supported by any provider" in error_str:
                # Try fallback models
                fallback_models = self._find_fallback_models(model)
                if fallback_models:
                    # Try each fallback model
                    for fallback in fallback_models[:3]:
                        try:
                            print(f"Trying fallback model: {fallback['id']}")
                            response = self.client.chat_completion(
                                model=fallback['id'],
                                messages=messages,
                                max_tokens=max_tokens,
                                temperature=temperature,
                                stream=stream
                            )
                            return response
                        except:
                            continue
                    
                    raise ValueError(f"Model {model} is not supported and all fallback models failed. "
                                   f"Try one of these: {', '.join([m['id'] for m in fallback_models[:3]])}")
                else:
                    raise ValueError(f"Model {model} is not supported and no fallback models available.")
            else:
                raise e
        
        # Convert to dict format
        return {
            "choices": [{
                "message": {
                    "role": "assistant",
                    "content": response.choices[0].message.content
                },
                "finish_reason": response.choices[0].finish_reason
            }],
            "model": model,
            "usage": {
                "prompt_tokens": getattr(response.usage, "prompt_tokens", 0),
                "completion_tokens": getattr(response.usage, "completion_tokens", 0),
                "total_tokens": getattr(response.usage, "total_tokens", 0)
            } if response.usage else None
        }

    def validate_model(self, model_id: str) -> Dict[str, Any]:
        """

        Validate if a model is supported and available.

        

        Args:

            model_id: Model ID to validate

            

        Returns:

            Validation result with status and fallback suggestions

        """
        try:
            # Try to get model info
            model_info = self.hf_api.model_info(model_id)
            
            # Check if model has inference API enabled
            if hasattr(model_info, 'inference') and not model_info.inference:
                # Try to find alternative models
                fallback_models = self._find_fallback_models(model_id)
                return {
                    "valid": False,
                    "error": f"Model {model_id} does not have inference API enabled",
                    "fallback_models": fallback_models,
                    "model_info": model_info
                }
            
            return {
                "valid": True,
                "model_info": model_info
            }
        except Exception as e:
            # Check if it's an auth error
            error_str = str(e).lower()
            if "401" in error_str or "unauthorized" in error_str or "invalid username or password" in error_str:
                # Auth error - model might be valid but we can't check
                return {
                    "valid": True,  # Assume valid since we can't verify due to auth
                    "warning": "Unable to verify model due to authentication. Assuming model is valid.",
                    "auth_error": True
                }
            
            # Model not found or not supported
            fallback_models = self._find_fallback_models(model_id)
            return {
                "valid": False,
                "error": str(e),
                "fallback_models": fallback_models
            }

    def _find_fallback_models(self, model_id: str) -> List[Dict[str, str]]:
        """

        Find fallback models similar to the requested model.

        

        Args:

            model_id: Original model ID

            

        Returns:

            List of fallback model suggestions

        """
        # Extract model name parts
        model_parts = model_id.lower().split('/')
        if len(model_parts) > 1:
            model_name = model_parts[-1]
        else:
            model_name = model_id.lower()
            
        # Remove version numbers and common prefixes
        clean_name = model_name.replace('-3b', '').replace('-8b', '').replace('-70b', '')
        clean_name = clean_name.replace('llama', '').replace('hermes', '').strip('-')
        
        # Search for similar models
        try:
            # Search for models with similar names
            similar_models = self.hf_api.list_models(
                search=model_name,
                sort="downloads",
                direction=-1,
                limit=5
            )
            
            # Filter for text generation models
            fallbacks = []
            for model in similar_models:
                if (hasattr(model, 'pipeline_tag') and 
                    model.pipeline_tag in ['text-generation', 'conversational', 'translation']):
                    fallbacks.append({
                        "id": model.modelId,
                        "name": getattr(model, 'author', '') + '/' + model.modelId.split('/')[-1],
                        "downloads": getattr(model, 'downloads', 0)
                    })
                    
            return fallbacks[:5]  # Return top 5 fallbacks
        except:
            # If search fails, return some common models including translation models
            return [
                {"id": "meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B", "downloads": 0},
                {"id": "microsoft/Phi-3-mini-4k-instruct", "name": "Phi-3 Mini", "downloads": 0},
                {"id": "google/gemma-2-2b-it", "name": "Gemma 2 2B", "downloads": 0},
                {"id": "Helsinki-NLP/opus-mt-en-es", "name": "English-Spanish Translator", "downloads": 0},
                {"id": "Helsinki-NLP/opus-mt-en-fr", "name": "English-French Translator", "downloads": 0}
            ]

    def get_model_task_support(self, model: str) -> Dict[str, Any]:
        """

        Get information about what tasks a model supports.

        

        Args:

            model: Model ID

            

        Returns:

            Model task support information

        """
        # Known conversational-only models
        conversational_only_models = [
            "meta-llama/Llama-3.2-3B-Instruct",
            "meta-llama/Llama-3.1-8B-Instruct",
            "meta-llama/Llama-3.1-70B-Instruct"
        ]
        
        if model in conversational_only_models:
            return {
                "supports_text_generation": False,
                "supports_conversational": True,
                "recommended_method": "chat_completion"
            }
        else:
            return {
                "supports_text_generation": True,
                "supports_conversational": True,
                "recommended_method": "text_generation_or_chat_completion"
            }

    def text_generation(

        self,

        model: str,

        prompt: str,

        max_new_tokens: int = 250,

        temperature: float = 0.7,

        top_p: float = 0.95,

        do_sample: bool = True

    ) -> Dict[str, Any]:
        """

        Send a text generation request to HuggingFace API.

        

        Args:

            model: Model ID

            prompt: Text prompt to complete

            max_new_tokens: Maximum new tokens to generate

            temperature: Sampling temperature

            top_p: Nucleus sampling parameter

            do_sample: Whether to use sampling

        

        Returns:

            API response as dict

        """
        # Validate model before use
        validation_result = self.validate_model(model)
        if not validation_result["valid"]:
            # Try fallback models
            fallback_models = validation_result.get("fallback_models", [])
            if fallback_models:
                # Use the first fallback model
                fallback_model = fallback_models[0]["id"]
                print(f"Warning: Model {model} not supported. Using fallback model {fallback_model}")
                model = fallback_model
            else:
                raise ValueError(f"Model {model} is not supported and no fallback models available. "
                               f"Error: {validation_result.get('error', 'Unknown error')}")

        try:
            response = self.client.text_generation(
                model=model,
                prompt=prompt,
                max_new_tokens=max_new_tokens,
                temperature=temperature,
                top_p=top_p,
                do_sample=do_sample
            )
            return {"generated_text": response}
        except Exception as e:
            # Check if the error is related to unsupported task
            error_str = str(e).lower()
            if "not supported for task text-generation" in error_str:
                raise ValueError(f"Model {model} is not supported for text-generation task. "
                               f"This model only supports conversational tasks. "
                               f"Please use chat_completion method instead.")
            elif "model_not_supported" in error_str or "not supported by any provider" in error_str:
                # Try fallback models
                fallback_models = self._find_fallback_models(model)
                if fallback_models:
                    # Try each fallback model
                    for fallback in fallback_models[:3]:
                        try:
                            print(f"Trying fallback model: {fallback['id']}")
                            response = self.client.text_generation(
                                model=fallback['id'],
                                prompt=prompt,
                                max_new_tokens=max_new_tokens,
                                temperature=temperature,
                                top_p=top_p,
                                do_sample=do_sample
                            )
                            return {"generated_text": response}
                        except:
                            continue
                    
                    raise ValueError(f"Model {model} is not supported and all fallback models failed. "
                                   f"Try one of these: {', '.join([m['id'] for m in fallback_models[:3]])}")
                else:
                    raise ValueError(f"Model {model} is not supported and no fallback models available.")
            else:
                raise e

    def get_model_info(self, model: str) -> Dict[str, Any]:
        """

        Get model information from HuggingFace Hub.

        

        Args:

            model: Model ID

        

        Returns:

            Model metadata dict

        """
        url = f"https://huggingface.co/api/models/{model}"
        response = requests.get(url, headers=self.headers)
        response.raise_for_status()
        return response.json()

    def search_models(

        self,

        query: str,

        task: str = "text-generation",

        limit: int = 10

    ) -> List[Dict[str, Any]]:
        """

        Search for models on HuggingFace Hub.

        

        Args:

            query: Search query

            task: Filter by task (e.g., "text-generation", "text-classification")

            limit: Maximum number of results

        

        Returns:

            List of model metadata dicts

        """
        url = "https://huggingface.co/api/models"
        params = {
            "search": query,
            "pipeline_tag": task,
            "limit": limit,
            "sort": "downloads",
            "direction": -1
        }
        
        response = requests.get(url, headers=self.headers, params=params)
        response.raise_for_status()
        return response.json()

    def image_generation(

        self,

        model: str,

        prompt: str,

        negative_prompt: Optional[str] = None,

        num_inference_steps: int = 50

    ) -> bytes:
        """

        Generate an image using a diffusion model.

        

        Args:

            model: Model ID (e.g., "stabilityai/stable-diffusion-xl-base-1.0")

            prompt: Text prompt for image generation

            negative_prompt: Negative prompt (what to avoid)

            num_inference_steps: Number of denoising steps

        

        Returns:

            Image bytes

        """
        url = f"{API_BASE}/models/{model}"
        
        payload = {
            "inputs": prompt,
            "parameters": {
                "num_inference_steps": num_inference_steps
            }
        }
        
        if negative_prompt:
            payload["parameters"]["negative_prompt"] = negative_prompt
        
        response = requests.post(url, headers=self.headers, json=payload)
        response.raise_for_status()
        return response.content

    def embedding(

        self,

        model: str,

        texts: List[str]

    ) -> List[List[float]]:
        """

        Get embeddings for texts.

        

        Args:

            model: Model ID (e.g., "sentence-transformers/all-MiniLM-L6-v2")

            texts: List of texts to embed

        

        Returns:

            List of embedding vectors

        """
        url = f"{API_BASE}/models/{model}"
        
        payload = {
            "inputs": texts
        }
        
        response = requests.post(url, headers=self.headers, json=payload)
        response.raise_for_status()
        return response.json()

    def summarization(

        self,

        model: str,

        text: str,

        max_length: int = 150,

        min_length: int = 30

    ) -> Dict[str, Any]:
        """

        Summarize text using a summarization model.

        

        Args:

            model: Model ID (e.g., "facebook/bart-large-cnn")

            text: Text to summarize

            max_length: Maximum summary length

            min_length: Minimum summary length

        

        Returns:

            API response with summary

        """
        url = f"{API_BASE}/models/{model}"
        
        payload = {
            "inputs": text,
            "parameters": {
                "max_length": max_length,
                "min_length": min_length
            }
        }
        
        response = requests.post(url, headers=self.headers, json=payload)
        response.raise_for_status()
        return response.json()

    def translation(

        self,

        model: str,

        text: str

    ) -> Dict[str, Any]:
        url = f"{API_BASE}/models/{model}"
        
        payload = {
            "inputs": text
        }
        
        response = requests.post(url, headers=self.headers, json=payload)
        response.raise_for_status()
        return response.json()

    def question_answering(

        self,

        model: str,

        question: str,

        context: str

    ) -> Dict[str, Any]:
        """

        Answer a question based on context.

        

        Args:

            model: Model ID (e.g., "deepset/roberta-base-squad2")

            question: The question to answer

            context: Context containing the answer

        

        Returns:

            API response with answer

        """
        url = f"{API_BASE}/models/{model}"
        
        payload = {
            "inputs": {
                "question": question,
                "context": context
            }
        }
        
        response = requests.post(url, headers=self.headers, json=payload)
        response.raise_for_status()
        return response.json()