Spaces:

delightfulrachel
/

salesforce-migration-assistant

Running

File size: 6,175 Bytes

c2acf75

"""API client functions for LLM interactions"""

import os
import time
import requests
import hashlib
from functools import lru_cache
from typing import Optional
import logging

logger = logging.getLogger(__name__)

# Model lists
together_models = [
    "Qwen/Qwen2.5-Coder-32B-Instruct",
    "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
    "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
    "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"
]

anthropic_models = [
    "claude-3-7-sonnet-20250219",
    "claude-3-haiku-20240307",
    "claude-opus-4-20250514", 
    "claude-sonnet-4-20250514"
]

all_models = together_models + anthropic_models

def get_api_key(provider: str) -> str:
    """Securely retrieve API key for the specified provider."""
    try:
        if provider == "together":
            api_key = os.getenv("TOGETHER_API_KEY")
            if not api_key:
                raise ValueError("API key not configured. Please contact administrator.")
            return api_key
        elif provider == "anthropic":
            api_key = os.getenv("ANTHROPIC_API_KEY")
            if not api_key:
                raise ValueError("API key not configured. Please contact administrator.")
            return api_key
        else:
            raise ValueError(f"Unknown provider: {provider}")
    except Exception as e:
        logger.error(f"Error retrieving API key: {e}")
        raise

def get_provider(model: str) -> str:
    """Determine the provider for a given model."""
    if model in together_models:
        return "together"
    elif model in anthropic_models:
        return "anthropic"
    else:
        raise ValueError(f"Unknown model: {model}")

def call_api_with_retry(api_func, *args, max_retries: int = 3, timeout: int = 30, **kwargs):
    """Call API with retry logic and timeout."""
    from utils import handle_api_error
    
    for attempt in range(max_retries):
        try:
            kwargs['timeout'] = timeout
            return api_func(*args, **kwargs)
        except requests.Timeout:
            if attempt == max_retries - 1:
                return "Request timed out. Please try again with a shorter input."
        except requests.ConnectionError:
            if attempt == max_retries - 1:
                return "Connection error. Please check your internet connection."
        except Exception as e:
            if attempt == max_retries - 1:
                return f"Error: {str(e)}"
        time.sleep(2 ** attempt)  # Exponential backoff

def call_together_api(model: str, prompt: str, temperature: float = 0.7, max_tokens: int = 1500) -> str:
    """Call Together AI API with enhanced error handling."""
    from utils import handle_api_error
    
    api_key = get_api_key("together")
    system_message = (
        "You are a Salesforce B2B Commerce expert. Be CONCISE and PRECISE. "
        "Focus on CODE QUALITY over explanations. Use structured formats when requested. "
        "Always check for syntax errors, security issues, and performance problems."
    )
    
    def make_request():
        headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        payload = {
            "model": model,
            "messages": [
                {"role": "system", "content": system_message},
                {"role": "user", "content": prompt}
            ],
            "temperature": temperature,
            "max_tokens": max_tokens,
            "top_p": 0.9
        }
        resp = requests.post(
            "https://api.together.xyz/v1/chat/completions",
            headers=headers,
            json=payload,
            timeout=30
        )
        if resp.status_code != 200:
            return handle_api_error(resp.status_code, resp.text)
        data = resp.json()
        return data["choices"][0]["message"]["content"]
    
    return call_api_with_retry(make_request)

def call_anthropic_api(model: str, prompt: str, temperature: float = 0.7, max_tokens: int = 1500) -> str:
    """Call Anthropic API with enhanced error handling."""
    from utils import handle_api_error
    
    api_key = get_api_key("anthropic")
    system_message = (
        "You are a Salesforce B2B Commerce expert. Be CONCISE and PRECISE. "
        "Focus on CODE QUALITY over explanations. Use structured formats when requested. "
        "Always check for syntax errors, security issues, and performance problems."
    )
    
    def make_request():
        headers = {
            "x-api-key": api_key,
            "anthropic-version": "2023-06-01",
            "content-type": "application/json"
        }
        payload = {
            "model": model,
            "system": system_message,
            "messages": [
                {"role": "user", "content": prompt}
            ],
            "temperature": temperature,
            "max_tokens": max_tokens
        }
        resp = requests.post(
            "https://api.anthropic.com/v1/messages",
            headers=headers,
            json=payload,
            timeout=30
        )
        if resp.status_code != 200:
            return handle_api_error(resp.status_code, resp.text)
        data = resp.json()
        return data["content"][0]["text"]
    
    return call_api_with_retry(make_request)

@lru_cache(maxsize=100)
def cached_llm_call(model_hash: str, prompt_hash: str, model: str, prompt: str, temperature: float = 0.7, max_tokens: int = 1500) -> str:
    """Cached LLM call to avoid repeated API calls for same inputs."""
    provider = get_provider(model)
    if provider == "together":
        return call_together_api(model, prompt, temperature, max_tokens)
    elif provider == "anthropic":
        return call_anthropic_api(model, prompt, temperature, max_tokens)
    else:
        return f"Error: Unknown provider for model {model}"

def call_llm(model: str, prompt: str, temperature: float = 0.7, max_tokens: int = 1500) -> str:
    """Call LLM with caching support."""
    model_hash = hashlib.md5(model.encode()).hexdigest()
    prompt_hash = hashlib.md5(prompt.encode()).hexdigest()
    return cached_llm_call(model_hash, prompt_hash, model, prompt, temperature, max_tokens)