""" Hugging Face API Client Provides methods for interacting with HuggingFace Inference API """ import os import requests from typing import Optional, List, Dict, Any from huggingface_hub import InferenceClient, HfApi from utils import load_settings # Settings paths SETTINGS_DIR = os.path.join(os.path.dirname(__file__), 'settings') APP_SETTINGS_FILE = os.path.join(SETTINGS_DIR, 'app.json') # Get HF token from settings HF_TOKEN = load_settings(APP_SETTINGS_FILE).get('hf_token') API_BASE = "https://api-inference.huggingface.co" class HuggingFaceAPI: def __init__(self, token: str = HF_TOKEN): self.token = token self.headers = { "Authorization": f"Bearer {token}", "Content-Type": "application/json" } self.client = InferenceClient(token=token) self.hf_api = HfApi(token=token) def model_info(self, model_id: str): """Get model info using HfApi (compatible with hf.py)""" return self.hf_api.model_info(model_id) def list_models(self, **kwargs): """List models using HfApi (compatible with hf.py)""" return self.hf_api.list_models(**kwargs) def chat_completion( self, model: str, messages: List[Dict[str, str]], max_tokens: int = 500, temperature: float = 0.7, stream: bool = False ) -> Dict[str, Any]: """ Send a chat completion request to HuggingFace API using huggingface_hub. Args: model: Model ID (e.g., "meta-llama/Llama-3.2-3B-Instruct") messages: List of message dicts with 'role' and 'content' max_tokens: Maximum tokens to generate temperature: Sampling temperature (0.0 - 1.0) stream: Whether to stream the response Returns: API response as dict """ # Validate model before use validation_result = self.validate_model(model) if not validation_result["valid"]: # Try fallback models fallback_models = validation_result.get("fallback_models", []) if fallback_models: # Use the first fallback model fallback_model = fallback_models[0]["id"] print(f"Warning: Model {model} not supported. Using fallback model {fallback_model}") model = fallback_model else: raise ValueError(f"Model {model} is not supported and no fallback models available. " f"Error: {validation_result.get('error', 'Unknown error')}") try: response = self.client.chat_completion( model=model, messages=messages, max_tokens=max_tokens, temperature=temperature, stream=stream ) except Exception as e: error_str = str(e).lower() if "model_not_supported" in error_str or "not supported by any provider" in error_str: # Try fallback models fallback_models = self._find_fallback_models(model) if fallback_models: # Try each fallback model for fallback in fallback_models[:3]: try: print(f"Trying fallback model: {fallback['id']}") response = self.client.chat_completion( model=fallback['id'], messages=messages, max_tokens=max_tokens, temperature=temperature, stream=stream ) return response except: continue raise ValueError(f"Model {model} is not supported and all fallback models failed. " f"Try one of these: {', '.join([m['id'] for m in fallback_models[:3]])}") else: raise ValueError(f"Model {model} is not supported and no fallback models available.") else: raise e # Convert to dict format return { "choices": [{ "message": { "role": "assistant", "content": response.choices[0].message.content }, "finish_reason": response.choices[0].finish_reason }], "model": model, "usage": { "prompt_tokens": getattr(response.usage, "prompt_tokens", 0), "completion_tokens": getattr(response.usage, "completion_tokens", 0), "total_tokens": getattr(response.usage, "total_tokens", 0) } if response.usage else None } def validate_model(self, model_id: str) -> Dict[str, Any]: """ Validate if a model is supported and available. Args: model_id: Model ID to validate Returns: Validation result with status and fallback suggestions """ try: # Try to get model info model_info = self.hf_api.model_info(model_id) # Check if model has inference API enabled if hasattr(model_info, 'inference') and not model_info.inference: # Try to find alternative models fallback_models = self._find_fallback_models(model_id) return { "valid": False, "error": f"Model {model_id} does not have inference API enabled", "fallback_models": fallback_models, "model_info": model_info } return { "valid": True, "model_info": model_info } except Exception as e: # Check if it's an auth error error_str = str(e).lower() if "401" in error_str or "unauthorized" in error_str or "invalid username or password" in error_str: # Auth error - model might be valid but we can't check return { "valid": True, # Assume valid since we can't verify due to auth "warning": "Unable to verify model due to authentication. Assuming model is valid.", "auth_error": True } # Model not found or not supported fallback_models = self._find_fallback_models(model_id) return { "valid": False, "error": str(e), "fallback_models": fallback_models } def _find_fallback_models(self, model_id: str) -> List[Dict[str, str]]: """ Find fallback models similar to the requested model. Args: model_id: Original model ID Returns: List of fallback model suggestions """ # Extract model name parts model_parts = model_id.lower().split('/') if len(model_parts) > 1: model_name = model_parts[-1] else: model_name = model_id.lower() # Remove version numbers and common prefixes clean_name = model_name.replace('-3b', '').replace('-8b', '').replace('-70b', '') clean_name = clean_name.replace('llama', '').replace('hermes', '').strip('-') # Search for similar models try: # Search for models with similar names similar_models = self.hf_api.list_models( search=model_name, sort="downloads", direction=-1, limit=5 ) # Filter for text generation models fallbacks = [] for model in similar_models: if (hasattr(model, 'pipeline_tag') and model.pipeline_tag in ['text-generation', 'conversational', 'translation']): fallbacks.append({ "id": model.modelId, "name": getattr(model, 'author', '') + '/' + model.modelId.split('/')[-1], "downloads": getattr(model, 'downloads', 0) }) return fallbacks[:5] # Return top 5 fallbacks except: # If search fails, return some common models including translation models return [ {"id": "meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B", "downloads": 0}, {"id": "microsoft/Phi-3-mini-4k-instruct", "name": "Phi-3 Mini", "downloads": 0}, {"id": "google/gemma-2-2b-it", "name": "Gemma 2 2B", "downloads": 0}, {"id": "Helsinki-NLP/opus-mt-en-es", "name": "English-Spanish Translator", "downloads": 0}, {"id": "Helsinki-NLP/opus-mt-en-fr", "name": "English-French Translator", "downloads": 0} ] def get_model_task_support(self, model: str) -> Dict[str, Any]: """ Get information about what tasks a model supports. Args: model: Model ID Returns: Model task support information """ # Known conversational-only models conversational_only_models = [ "meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.1-8B-Instruct", "meta-llama/Llama-3.1-70B-Instruct" ] if model in conversational_only_models: return { "supports_text_generation": False, "supports_conversational": True, "recommended_method": "chat_completion" } else: return { "supports_text_generation": True, "supports_conversational": True, "recommended_method": "text_generation_or_chat_completion" } def text_generation( self, model: str, prompt: str, max_new_tokens: int = 250, temperature: float = 0.7, top_p: float = 0.95, do_sample: bool = True ) -> Dict[str, Any]: """ Send a text generation request to HuggingFace API. Args: model: Model ID prompt: Text prompt to complete max_new_tokens: Maximum new tokens to generate temperature: Sampling temperature top_p: Nucleus sampling parameter do_sample: Whether to use sampling Returns: API response as dict """ # Validate model before use validation_result = self.validate_model(model) if not validation_result["valid"]: # Try fallback models fallback_models = validation_result.get("fallback_models", []) if fallback_models: # Use the first fallback model fallback_model = fallback_models[0]["id"] print(f"Warning: Model {model} not supported. Using fallback model {fallback_model}") model = fallback_model else: raise ValueError(f"Model {model} is not supported and no fallback models available. " f"Error: {validation_result.get('error', 'Unknown error')}") try: response = self.client.text_generation( model=model, prompt=prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=do_sample ) return {"generated_text": response} except Exception as e: # Check if the error is related to unsupported task error_str = str(e).lower() if "not supported for task text-generation" in error_str: raise ValueError(f"Model {model} is not supported for text-generation task. " f"This model only supports conversational tasks. " f"Please use chat_completion method instead.") elif "model_not_supported" in error_str or "not supported by any provider" in error_str: # Try fallback models fallback_models = self._find_fallback_models(model) if fallback_models: # Try each fallback model for fallback in fallback_models[:3]: try: print(f"Trying fallback model: {fallback['id']}") response = self.client.text_generation( model=fallback['id'], prompt=prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=do_sample ) return {"generated_text": response} except: continue raise ValueError(f"Model {model} is not supported and all fallback models failed. " f"Try one of these: {', '.join([m['id'] for m in fallback_models[:3]])}") else: raise ValueError(f"Model {model} is not supported and no fallback models available.") else: raise e def get_model_info(self, model: str) -> Dict[str, Any]: """ Get model information from HuggingFace Hub. Args: model: Model ID Returns: Model metadata dict """ url = f"https://huggingface.co/api/models/{model}" response = requests.get(url, headers=self.headers) response.raise_for_status() return response.json() def search_models( self, query: str, task: str = "text-generation", limit: int = 10 ) -> List[Dict[str, Any]]: """ Search for models on HuggingFace Hub. Args: query: Search query task: Filter by task (e.g., "text-generation", "text-classification") limit: Maximum number of results Returns: List of model metadata dicts """ url = "https://huggingface.co/api/models" params = { "search": query, "pipeline_tag": task, "limit": limit, "sort": "downloads", "direction": -1 } response = requests.get(url, headers=self.headers, params=params) response.raise_for_status() return response.json() def image_generation( self, model: str, prompt: str, negative_prompt: Optional[str] = None, num_inference_steps: int = 50 ) -> bytes: """ Generate an image using a diffusion model. Args: model: Model ID (e.g., "stabilityai/stable-diffusion-xl-base-1.0") prompt: Text prompt for image generation negative_prompt: Negative prompt (what to avoid) num_inference_steps: Number of denoising steps Returns: Image bytes """ url = f"{API_BASE}/models/{model}" payload = { "inputs": prompt, "parameters": { "num_inference_steps": num_inference_steps } } if negative_prompt: payload["parameters"]["negative_prompt"] = negative_prompt response = requests.post(url, headers=self.headers, json=payload) response.raise_for_status() return response.content def embedding( self, model: str, texts: List[str] ) -> List[List[float]]: """ Get embeddings for texts. Args: model: Model ID (e.g., "sentence-transformers/all-MiniLM-L6-v2") texts: List of texts to embed Returns: List of embedding vectors """ url = f"{API_BASE}/models/{model}" payload = { "inputs": texts } response = requests.post(url, headers=self.headers, json=payload) response.raise_for_status() return response.json() def summarization( self, model: str, text: str, max_length: int = 150, min_length: int = 30 ) -> Dict[str, Any]: """ Summarize text using a summarization model. Args: model: Model ID (e.g., "facebook/bart-large-cnn") text: Text to summarize max_length: Maximum summary length min_length: Minimum summary length Returns: API response with summary """ url = f"{API_BASE}/models/{model}" payload = { "inputs": text, "parameters": { "max_length": max_length, "min_length": min_length } } response = requests.post(url, headers=self.headers, json=payload) response.raise_for_status() return response.json() def translation( self, model: str, text: str ) -> Dict[str, Any]: url = f"{API_BASE}/models/{model}" payload = { "inputs": text } response = requests.post(url, headers=self.headers, json=payload) response.raise_for_status() return response.json() def question_answering( self, model: str, question: str, context: str ) -> Dict[str, Any]: """ Answer a question based on context. Args: model: Model ID (e.g., "deepset/roberta-base-squad2") question: The question to answer context: Context containing the answer Returns: API response with answer """ url = f"{API_BASE}/models/{model}" payload = { "inputs": { "question": question, "context": context } } response = requests.post(url, headers=self.headers, json=payload) response.raise_for_status() return response.json()