Spaces:
Runtime error
Runtime error
| """ | |
| Hugging Face API Client | |
| Provides methods for interacting with HuggingFace Inference API | |
| """ | |
| import os | |
| import requests | |
| from typing import Optional, List, Dict, Any | |
| from huggingface_hub import InferenceClient, HfApi | |
| from utils import load_settings | |
| # Settings paths | |
| SETTINGS_DIR = os.path.join(os.path.dirname(__file__), 'settings') | |
| APP_SETTINGS_FILE = os.path.join(SETTINGS_DIR, 'app.json') | |
| # Get HF token from settings | |
| HF_TOKEN = load_settings(APP_SETTINGS_FILE).get('hf_token') | |
| API_BASE = "https://api-inference.huggingface.co" | |
| class HuggingFaceAPI: | |
| def __init__(self, token: str = HF_TOKEN): | |
| self.token = token | |
| self.headers = { | |
| "Authorization": f"Bearer {token}", | |
| "Content-Type": "application/json" | |
| } | |
| self.client = InferenceClient(token=token) | |
| self.hf_api = HfApi(token=token) | |
| def model_info(self, model_id: str): | |
| """Get model info using HfApi (compatible with hf.py)""" | |
| return self.hf_api.model_info(model_id) | |
| def list_models(self, **kwargs): | |
| """List models using HfApi (compatible with hf.py)""" | |
| return self.hf_api.list_models(**kwargs) | |
| def chat_completion( | |
| self, | |
| model: str, | |
| messages: List[Dict[str, str]], | |
| max_tokens: int = 500, | |
| temperature: float = 0.7, | |
| stream: bool = False | |
| ) -> Dict[str, Any]: | |
| """ | |
| Send a chat completion request to HuggingFace API using huggingface_hub. | |
| Args: | |
| model: Model ID (e.g., "meta-llama/Llama-3.2-3B-Instruct") | |
| messages: List of message dicts with 'role' and 'content' | |
| max_tokens: Maximum tokens to generate | |
| temperature: Sampling temperature (0.0 - 1.0) | |
| stream: Whether to stream the response | |
| Returns: | |
| API response as dict | |
| """ | |
| # Validate model before use | |
| validation_result = self.validate_model(model) | |
| if not validation_result["valid"]: | |
| # Try fallback models | |
| fallback_models = validation_result.get("fallback_models", []) | |
| if fallback_models: | |
| # Use the first fallback model | |
| fallback_model = fallback_models[0]["id"] | |
| print(f"Warning: Model {model} not supported. Using fallback model {fallback_model}") | |
| model = fallback_model | |
| else: | |
| raise ValueError(f"Model {model} is not supported and no fallback models available. " | |
| f"Error: {validation_result.get('error', 'Unknown error')}") | |
| try: | |
| response = self.client.chat_completion( | |
| model=model, | |
| messages=messages, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| stream=stream | |
| ) | |
| except Exception as e: | |
| error_str = str(e).lower() | |
| if "model_not_supported" in error_str or "not supported by any provider" in error_str: | |
| # Try fallback models | |
| fallback_models = self._find_fallback_models(model) | |
| if fallback_models: | |
| # Try each fallback model | |
| for fallback in fallback_models[:3]: | |
| try: | |
| print(f"Trying fallback model: {fallback['id']}") | |
| response = self.client.chat_completion( | |
| model=fallback['id'], | |
| messages=messages, | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| stream=stream | |
| ) | |
| return response | |
| except: | |
| continue | |
| raise ValueError(f"Model {model} is not supported and all fallback models failed. " | |
| f"Try one of these: {', '.join([m['id'] for m in fallback_models[:3]])}") | |
| else: | |
| raise ValueError(f"Model {model} is not supported and no fallback models available.") | |
| else: | |
| raise e | |
| # Convert to dict format | |
| return { | |
| "choices": [{ | |
| "message": { | |
| "role": "assistant", | |
| "content": response.choices[0].message.content | |
| }, | |
| "finish_reason": response.choices[0].finish_reason | |
| }], | |
| "model": model, | |
| "usage": { | |
| "prompt_tokens": getattr(response.usage, "prompt_tokens", 0), | |
| "completion_tokens": getattr(response.usage, "completion_tokens", 0), | |
| "total_tokens": getattr(response.usage, "total_tokens", 0) | |
| } if response.usage else None | |
| } | |
| def validate_model(self, model_id: str) -> Dict[str, Any]: | |
| """ | |
| Validate if a model is supported and available. | |
| Args: | |
| model_id: Model ID to validate | |
| Returns: | |
| Validation result with status and fallback suggestions | |
| """ | |
| try: | |
| # Try to get model info | |
| model_info = self.hf_api.model_info(model_id) | |
| # Check if model has inference API enabled | |
| if hasattr(model_info, 'inference') and not model_info.inference: | |
| # Try to find alternative models | |
| fallback_models = self._find_fallback_models(model_id) | |
| return { | |
| "valid": False, | |
| "error": f"Model {model_id} does not have inference API enabled", | |
| "fallback_models": fallback_models, | |
| "model_info": model_info | |
| } | |
| return { | |
| "valid": True, | |
| "model_info": model_info | |
| } | |
| except Exception as e: | |
| # Check if it's an auth error | |
| error_str = str(e).lower() | |
| if "401" in error_str or "unauthorized" in error_str or "invalid username or password" in error_str: | |
| # Auth error - model might be valid but we can't check | |
| return { | |
| "valid": True, # Assume valid since we can't verify due to auth | |
| "warning": "Unable to verify model due to authentication. Assuming model is valid.", | |
| "auth_error": True | |
| } | |
| # Model not found or not supported | |
| fallback_models = self._find_fallback_models(model_id) | |
| return { | |
| "valid": False, | |
| "error": str(e), | |
| "fallback_models": fallback_models | |
| } | |
| def _find_fallback_models(self, model_id: str) -> List[Dict[str, str]]: | |
| """ | |
| Find fallback models similar to the requested model. | |
| Args: | |
| model_id: Original model ID | |
| Returns: | |
| List of fallback model suggestions | |
| """ | |
| # Extract model name parts | |
| model_parts = model_id.lower().split('/') | |
| if len(model_parts) > 1: | |
| model_name = model_parts[-1] | |
| else: | |
| model_name = model_id.lower() | |
| # Remove version numbers and common prefixes | |
| clean_name = model_name.replace('-3b', '').replace('-8b', '').replace('-70b', '') | |
| clean_name = clean_name.replace('llama', '').replace('hermes', '').strip('-') | |
| # Search for similar models | |
| try: | |
| # Search for models with similar names | |
| similar_models = self.hf_api.list_models( | |
| search=model_name, | |
| sort="downloads", | |
| direction=-1, | |
| limit=5 | |
| ) | |
| # Filter for text generation models | |
| fallbacks = [] | |
| for model in similar_models: | |
| if (hasattr(model, 'pipeline_tag') and | |
| model.pipeline_tag in ['text-generation', 'conversational', 'translation']): | |
| fallbacks.append({ | |
| "id": model.modelId, | |
| "name": getattr(model, 'author', '') + '/' + model.modelId.split('/')[-1], | |
| "downloads": getattr(model, 'downloads', 0) | |
| }) | |
| return fallbacks[:5] # Return top 5 fallbacks | |
| except: | |
| # If search fails, return some common models including translation models | |
| return [ | |
| {"id": "meta-llama/Llama-3.2-3B-Instruct", "name": "Llama 3.2 3B", "downloads": 0}, | |
| {"id": "microsoft/Phi-3-mini-4k-instruct", "name": "Phi-3 Mini", "downloads": 0}, | |
| {"id": "google/gemma-2-2b-it", "name": "Gemma 2 2B", "downloads": 0}, | |
| {"id": "Helsinki-NLP/opus-mt-en-es", "name": "English-Spanish Translator", "downloads": 0}, | |
| {"id": "Helsinki-NLP/opus-mt-en-fr", "name": "English-French Translator", "downloads": 0} | |
| ] | |
| def get_model_task_support(self, model: str) -> Dict[str, Any]: | |
| """ | |
| Get information about what tasks a model supports. | |
| Args: | |
| model: Model ID | |
| Returns: | |
| Model task support information | |
| """ | |
| # Known conversational-only models | |
| conversational_only_models = [ | |
| "meta-llama/Llama-3.2-3B-Instruct", | |
| "meta-llama/Llama-3.1-8B-Instruct", | |
| "meta-llama/Llama-3.1-70B-Instruct" | |
| ] | |
| if model in conversational_only_models: | |
| return { | |
| "supports_text_generation": False, | |
| "supports_conversational": True, | |
| "recommended_method": "chat_completion" | |
| } | |
| else: | |
| return { | |
| "supports_text_generation": True, | |
| "supports_conversational": True, | |
| "recommended_method": "text_generation_or_chat_completion" | |
| } | |
| def text_generation( | |
| self, | |
| model: str, | |
| prompt: str, | |
| max_new_tokens: int = 250, | |
| temperature: float = 0.7, | |
| top_p: float = 0.95, | |
| do_sample: bool = True | |
| ) -> Dict[str, Any]: | |
| """ | |
| Send a text generation request to HuggingFace API. | |
| Args: | |
| model: Model ID | |
| prompt: Text prompt to complete | |
| max_new_tokens: Maximum new tokens to generate | |
| temperature: Sampling temperature | |
| top_p: Nucleus sampling parameter | |
| do_sample: Whether to use sampling | |
| Returns: | |
| API response as dict | |
| """ | |
| # Validate model before use | |
| validation_result = self.validate_model(model) | |
| if not validation_result["valid"]: | |
| # Try fallback models | |
| fallback_models = validation_result.get("fallback_models", []) | |
| if fallback_models: | |
| # Use the first fallback model | |
| fallback_model = fallback_models[0]["id"] | |
| print(f"Warning: Model {model} not supported. Using fallback model {fallback_model}") | |
| model = fallback_model | |
| else: | |
| raise ValueError(f"Model {model} is not supported and no fallback models available. " | |
| f"Error: {validation_result.get('error', 'Unknown error')}") | |
| try: | |
| response = self.client.text_generation( | |
| model=model, | |
| prompt=prompt, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=do_sample | |
| ) | |
| return {"generated_text": response} | |
| except Exception as e: | |
| # Check if the error is related to unsupported task | |
| error_str = str(e).lower() | |
| if "not supported for task text-generation" in error_str: | |
| raise ValueError(f"Model {model} is not supported for text-generation task. " | |
| f"This model only supports conversational tasks. " | |
| f"Please use chat_completion method instead.") | |
| elif "model_not_supported" in error_str or "not supported by any provider" in error_str: | |
| # Try fallback models | |
| fallback_models = self._find_fallback_models(model) | |
| if fallback_models: | |
| # Try each fallback model | |
| for fallback in fallback_models[:3]: | |
| try: | |
| print(f"Trying fallback model: {fallback['id']}") | |
| response = self.client.text_generation( | |
| model=fallback['id'], | |
| prompt=prompt, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=do_sample | |
| ) | |
| return {"generated_text": response} | |
| except: | |
| continue | |
| raise ValueError(f"Model {model} is not supported and all fallback models failed. " | |
| f"Try one of these: {', '.join([m['id'] for m in fallback_models[:3]])}") | |
| else: | |
| raise ValueError(f"Model {model} is not supported and no fallback models available.") | |
| else: | |
| raise e | |
| def get_model_info(self, model: str) -> Dict[str, Any]: | |
| """ | |
| Get model information from HuggingFace Hub. | |
| Args: | |
| model: Model ID | |
| Returns: | |
| Model metadata dict | |
| """ | |
| url = f"https://huggingface.co/api/models/{model}" | |
| response = requests.get(url, headers=self.headers) | |
| response.raise_for_status() | |
| return response.json() | |
| def search_models( | |
| self, | |
| query: str, | |
| task: str = "text-generation", | |
| limit: int = 10 | |
| ) -> List[Dict[str, Any]]: | |
| """ | |
| Search for models on HuggingFace Hub. | |
| Args: | |
| query: Search query | |
| task: Filter by task (e.g., "text-generation", "text-classification") | |
| limit: Maximum number of results | |
| Returns: | |
| List of model metadata dicts | |
| """ | |
| url = "https://huggingface.co/api/models" | |
| params = { | |
| "search": query, | |
| "pipeline_tag": task, | |
| "limit": limit, | |
| "sort": "downloads", | |
| "direction": -1 | |
| } | |
| response = requests.get(url, headers=self.headers, params=params) | |
| response.raise_for_status() | |
| return response.json() | |
| def image_generation( | |
| self, | |
| model: str, | |
| prompt: str, | |
| negative_prompt: Optional[str] = None, | |
| num_inference_steps: int = 50 | |
| ) -> bytes: | |
| """ | |
| Generate an image using a diffusion model. | |
| Args: | |
| model: Model ID (e.g., "stabilityai/stable-diffusion-xl-base-1.0") | |
| prompt: Text prompt for image generation | |
| negative_prompt: Negative prompt (what to avoid) | |
| num_inference_steps: Number of denoising steps | |
| Returns: | |
| Image bytes | |
| """ | |
| url = f"{API_BASE}/models/{model}" | |
| payload = { | |
| "inputs": prompt, | |
| "parameters": { | |
| "num_inference_steps": num_inference_steps | |
| } | |
| } | |
| if negative_prompt: | |
| payload["parameters"]["negative_prompt"] = negative_prompt | |
| response = requests.post(url, headers=self.headers, json=payload) | |
| response.raise_for_status() | |
| return response.content | |
| def embedding( | |
| self, | |
| model: str, | |
| texts: List[str] | |
| ) -> List[List[float]]: | |
| """ | |
| Get embeddings for texts. | |
| Args: | |
| model: Model ID (e.g., "sentence-transformers/all-MiniLM-L6-v2") | |
| texts: List of texts to embed | |
| Returns: | |
| List of embedding vectors | |
| """ | |
| url = f"{API_BASE}/models/{model}" | |
| payload = { | |
| "inputs": texts | |
| } | |
| response = requests.post(url, headers=self.headers, json=payload) | |
| response.raise_for_status() | |
| return response.json() | |
| def summarization( | |
| self, | |
| model: str, | |
| text: str, | |
| max_length: int = 150, | |
| min_length: int = 30 | |
| ) -> Dict[str, Any]: | |
| """ | |
| Summarize text using a summarization model. | |
| Args: | |
| model: Model ID (e.g., "facebook/bart-large-cnn") | |
| text: Text to summarize | |
| max_length: Maximum summary length | |
| min_length: Minimum summary length | |
| Returns: | |
| API response with summary | |
| """ | |
| url = f"{API_BASE}/models/{model}" | |
| payload = { | |
| "inputs": text, | |
| "parameters": { | |
| "max_length": max_length, | |
| "min_length": min_length | |
| } | |
| } | |
| response = requests.post(url, headers=self.headers, json=payload) | |
| response.raise_for_status() | |
| return response.json() | |
| def translation( | |
| self, | |
| model: str, | |
| text: str | |
| ) -> Dict[str, Any]: | |
| url = f"{API_BASE}/models/{model}" | |
| payload = { | |
| "inputs": text | |
| } | |
| response = requests.post(url, headers=self.headers, json=payload) | |
| response.raise_for_status() | |
| return response.json() | |
| def question_answering( | |
| self, | |
| model: str, | |
| question: str, | |
| context: str | |
| ) -> Dict[str, Any]: | |
| """ | |
| Answer a question based on context. | |
| Args: | |
| model: Model ID (e.g., "deepset/roberta-base-squad2") | |
| question: The question to answer | |
| context: Context containing the answer | |
| Returns: | |
| API response with answer | |
| """ | |
| url = f"{API_BASE}/models/{model}" | |
| payload = { | |
| "inputs": { | |
| "question": question, | |
| "context": context | |
| } | |
| } | |
| response = requests.post(url, headers=self.headers, json=payload) | |
| response.raise_for_status() | |
| return response.json() | |