Spaces:

NeilDriscoll
/

InstaAutoApp_TeamDataMavericks

Sleeping

File size: 5,389 Bytes

3c3e122

"""
Insta-AutoApp LLM Client
HuggingFace Inference API wrapper with retry logic.
"""

import time
import logging
import requests
from typing import Optional

from config import (
    HF_API_TOKEN,
    HF_API_URL,
    HF_MODEL_ID,
    MAX_RETRIES,
    RETRY_DELAY,
    REQUEST_TIMEOUT
)

logger = logging.getLogger(__name__)


class LLMClientError(Exception):
    """Custom exception for LLM client errors."""
    pass


class LLMClient:
    """Client for HuggingFace Inference API with retry logic."""
    
    def __init__(self):
        self.api_url = HF_API_URL
        self.headers = {
            "Authorization": f"Bearer {HF_API_TOKEN}",
            "Content-Type": "application/json"
        }
        self.model_id = HF_MODEL_ID
        
        if not HF_API_TOKEN:
            logger.warning("HF_API_TOKEN not set. API calls will fail.")
    
    def _make_request(self, prompt: str, max_new_tokens: int = 1024) -> str:
        """
        Make a single request to the HuggingFace Inference API.
        
        Args:
            prompt: The full prompt to send to the model
            max_new_tokens: Maximum tokens to generate
            
        Returns:
            The generated text response
            
        Raises:
            LLMClientError: If the request fails
        """
        payload = {
            "inputs": prompt,
            "parameters": {
                "max_new_tokens": max_new_tokens,
                "temperature": 0.7,
                "top_p": 0.9,
                "do_sample": True,
                "return_full_text": False
            }
        }
        
        try:
            response = requests.post(
                self.api_url,
                headers=self.headers,
                json=payload,
                timeout=REQUEST_TIMEOUT
            )
            
            # Handle specific error codes
            if response.status_code == 401:
                raise LLMClientError("Invalid API token. Please check your HF_API_TOKEN.")
            elif response.status_code == 503:
                raise LLMClientError("Model is loading. Please try again in a moment.")
            elif response.status_code >= 500:
                raise LLMClientError(f"Server error (HTTP {response.status_code}). Retrying...")
            elif response.status_code >= 400:
                raise LLMClientError(f"Request error (HTTP {response.status_code}): {response.text}")
            
            response.raise_for_status()
            
            result = response.json()
            
            # Handle different response formats
            if isinstance(result, list) and len(result) > 0:
                if "generated_text" in result[0]:
                    return result[0]["generated_text"].strip()
                else:
                    raise LLMClientError(f"Unexpected response format: {result}")
            elif isinstance(result, dict):
                if "generated_text" in result:
                    return result["generated_text"].strip()
                elif "error" in result:
                    raise LLMClientError(f"API error: {result['error']}")
                else:
                    raise LLMClientError(f"Unexpected response format: {result}")
            else:
                raise LLMClientError(f"Unexpected response type: {type(result)}")
                
        except requests.exceptions.Timeout:
            raise LLMClientError("Request timed out. The service may be overloaded.")
        except requests.exceptions.ConnectionError:
            raise LLMClientError("Could not connect to the AI service. Please check your internet connection.")
        except requests.exceptions.RequestException as e:
            raise LLMClientError(f"Request failed: {str(e)}")
    
    def generate(self, prompt: str, max_new_tokens: int = 1024) -> Optional[str]:
        """
        Generate text with automatic retry on transient failures.
        
        Args:
            prompt: The full prompt to send to the model
            max_new_tokens: Maximum tokens to generate
            
        Returns:
            The generated text, or None if all retries fail
        """
        last_error = None
        
        for attempt in range(MAX_RETRIES):
            try:
                result = self._make_request(prompt, max_new_tokens)
                return result
                
            except LLMClientError as e:
                last_error = e
                logger.warning(f"LLM request failed (attempt {attempt + 1}/{MAX_RETRIES}): {e}")
                
                # Don't retry on auth errors
                if "Invalid API token" in str(e):
                    break
                
                # Wait before retrying (except on last attempt)
                if attempt < MAX_RETRIES - 1:
                    time.sleep(RETRY_DELAY)
        
        logger.error(f"All {MAX_RETRIES} LLM request attempts failed. Last error: {last_error}")
        return None
    
    def is_configured(self) -> bool:
        """Check if the client is properly configured with an API token."""
        return bool(HF_API_TOKEN)


# Singleton instance
_llm_client: Optional[LLMClient] = None


def get_llm_client() -> LLMClient:
    """Get the singleton LLM client instance."""
    global _llm_client
    if _llm_client is None:
        _llm_client = LLMClient()
    return _llm_client