|
|
""" |
|
|
Free LLM Providers for SPARKNET |
|
|
|
|
|
Supports multiple FREE-tier LLM providers: |
|
|
1. Groq - Very fast, generous free tier (14,400 req/day) |
|
|
2. Google Gemini - 15 req/min free |
|
|
3. OpenRouter - Access to many free models |
|
|
4. GitHub Models - Free GPT-4o, Llama access |
|
|
5. HuggingFace Inference API - Thousands of free models |
|
|
6. Together AI - $25 free credits |
|
|
7. Mistral AI - Free experiment plan |
|
|
8. Offline mode - No API required |
|
|
|
|
|
SECURITY & PRIVACY CONSIDERATIONS |
|
|
================================== |
|
|
|
|
|
GDPR COMPLIANCE: |
|
|
- Cloud LLM providers may process data outside the EU |
|
|
- For GDPR-sensitive workloads, use: |
|
|
1. Offline mode with local Ollama |
|
|
2. EU-hosted providers (when available) |
|
|
3. Data anonymization before API calls |
|
|
- Consider data processing agreements with LLM providers |
|
|
- Implement data minimization - only send necessary context |
|
|
|
|
|
DATA ISOLATION OPTIONS: |
|
|
1. FULLY LOCAL (Maximum Privacy): |
|
|
- Use Ollama for 100% on-premise inference |
|
|
- No data transmitted to external services |
|
|
- Configure: set no cloud API keys, system uses offline mode |
|
|
|
|
|
2. HYBRID (Balanced): |
|
|
- Use local Ollama for sensitive documents |
|
|
- Use cloud LLMs for general queries |
|
|
- Implement document classification for routing |
|
|
|
|
|
3. CLOUD-ONLY (Convenience): |
|
|
- All inference via cloud providers |
|
|
- Suitable for non-sensitive/public data |
|
|
- Review provider privacy policies |
|
|
|
|
|
PRIVATE DEPLOYMENT NOTES: |
|
|
- For enterprise deployments, configure Ollama on internal network |
|
|
- Use VPN/private endpoints for database connections |
|
|
- Enable audit logging for all LLM interactions |
|
|
- Implement rate limiting and access controls |
|
|
|
|
|
STREAMLIT CLOUD DEPLOYMENT: |
|
|
- Store API keys in Streamlit secrets (secrets.toml) |
|
|
- Never commit secrets to version control |
|
|
- Use environment variables as fallback |
|
|
- Enable session-based authentication |
|
|
|
|
|
Author: SPARKNET Team |
|
|
Project: VISTA/Horizon EU |
|
|
""" |
|
|
|
|
|
import os |
|
|
import requests |
|
|
from typing import Optional, Tuple, List, Dict, Any |
|
|
from dataclasses import dataclass |
|
|
from loguru import logger |
|
|
import streamlit as st |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class LLMResponse: |
|
|
text: str |
|
|
model: str |
|
|
provider: str |
|
|
success: bool |
|
|
error: Optional[str] = None |
|
|
usage: Optional[Dict[str, int]] = None |
|
|
|
|
|
|
|
|
def get_secret(key: str, default: str = None) -> Optional[str]: |
|
|
"""Get secret from Streamlit secrets or environment.""" |
|
|
|
|
|
try: |
|
|
if hasattr(st, 'secrets') and key in st.secrets: |
|
|
return st.secrets[key] |
|
|
except: |
|
|
pass |
|
|
|
|
|
return os.environ.get(key, default) |
|
|
|
|
|
|
|
|
class GroqProvider: |
|
|
""" |
|
|
Groq - FREE tier with very fast inference. |
|
|
|
|
|
Free tier: 14,400 requests/day, 300+ tokens/sec |
|
|
Get free key: https://console.groq.com/keys |
|
|
""" |
|
|
|
|
|
API_URL = "https://api.groq.com/openai/v1/chat/completions" |
|
|
|
|
|
MODELS = { |
|
|
"llama-3.3-70b": "llama-3.3-70b-versatile", |
|
|
"llama-3.1-8b": "llama-3.1-8b-instant", |
|
|
"mixtral": "mixtral-8x7b-32768", |
|
|
"gemma2": "gemma2-9b-it", |
|
|
} |
|
|
|
|
|
def __init__(self, api_key: Optional[str] = None): |
|
|
self.api_key = api_key or get_secret("GROQ_API_KEY") |
|
|
self.name = "Groq" |
|
|
|
|
|
@property |
|
|
def is_configured(self) -> bool: |
|
|
return bool(self.api_key) |
|
|
|
|
|
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024, |
|
|
system_prompt: str = None) -> LLMResponse: |
|
|
if not self.api_key: |
|
|
return LLMResponse("", "", self.name, False, "No Groq API key") |
|
|
|
|
|
model = model or self.MODELS["llama-3.1-8b"] |
|
|
|
|
|
messages = [] |
|
|
if system_prompt: |
|
|
messages.append({"role": "system", "content": system_prompt}) |
|
|
messages.append({"role": "user", "content": prompt}) |
|
|
|
|
|
try: |
|
|
response = requests.post( |
|
|
self.API_URL, |
|
|
headers={ |
|
|
"Authorization": f"Bearer {self.api_key}", |
|
|
"Content-Type": "application/json" |
|
|
}, |
|
|
json={ |
|
|
"model": model, |
|
|
"messages": messages, |
|
|
"max_tokens": max_tokens, |
|
|
"temperature": 0.7, |
|
|
}, |
|
|
timeout=30 |
|
|
) |
|
|
response.raise_for_status() |
|
|
result = response.json() |
|
|
|
|
|
return LLMResponse( |
|
|
text=result["choices"][0]["message"]["content"], |
|
|
model=model, |
|
|
provider=self.name, |
|
|
success=True, |
|
|
usage=result.get("usage") |
|
|
) |
|
|
except Exception as e: |
|
|
return LLMResponse("", model, self.name, False, str(e)) |
|
|
|
|
|
|
|
|
class GoogleGeminiProvider: |
|
|
""" |
|
|
Google AI Studio (Gemini) - FREE tier. |
|
|
|
|
|
Free tier: ~15 requests/min, Gemini 2.0 Flash & 1.5 Pro |
|
|
Get free key: https://aistudio.google.com/apikey |
|
|
""" |
|
|
|
|
|
API_URL = "https://generativelanguage.googleapis.com/v1beta/models" |
|
|
|
|
|
MODELS = { |
|
|
"gemini-2.0-flash": "gemini-2.0-flash-exp", |
|
|
"gemini-1.5-flash": "gemini-1.5-flash", |
|
|
"gemini-1.5-pro": "gemini-1.5-pro", |
|
|
} |
|
|
|
|
|
def __init__(self, api_key: Optional[str] = None): |
|
|
self.api_key = api_key or get_secret("GOOGLE_API_KEY") or get_secret("GEMINI_API_KEY") |
|
|
self.name = "Google Gemini" |
|
|
|
|
|
@property |
|
|
def is_configured(self) -> bool: |
|
|
return bool(self.api_key) |
|
|
|
|
|
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024, |
|
|
system_prompt: str = None) -> LLMResponse: |
|
|
if not self.api_key: |
|
|
return LLMResponse("", "", self.name, False, "No Google API key") |
|
|
|
|
|
model = model or self.MODELS["gemini-1.5-flash"] |
|
|
|
|
|
|
|
|
contents = [] |
|
|
if system_prompt: |
|
|
contents.append({"role": "user", "parts": [{"text": system_prompt}]}) |
|
|
contents.append({"role": "model", "parts": [{"text": "Understood. I will follow these instructions."}]}) |
|
|
contents.append({"role": "user", "parts": [{"text": prompt}]}) |
|
|
|
|
|
try: |
|
|
url = f"{self.API_URL}/{model}:generateContent?key={self.api_key}" |
|
|
response = requests.post( |
|
|
url, |
|
|
json={ |
|
|
"contents": contents, |
|
|
"generationConfig": { |
|
|
"maxOutputTokens": max_tokens, |
|
|
"temperature": 0.7, |
|
|
} |
|
|
}, |
|
|
timeout=60 |
|
|
) |
|
|
response.raise_for_status() |
|
|
result = response.json() |
|
|
|
|
|
text = result["candidates"][0]["content"]["parts"][0]["text"] |
|
|
|
|
|
return LLMResponse( |
|
|
text=text, |
|
|
model=model, |
|
|
provider=self.name, |
|
|
success=True |
|
|
) |
|
|
except Exception as e: |
|
|
return LLMResponse("", model, self.name, False, str(e)) |
|
|
|
|
|
|
|
|
class OpenRouterProvider: |
|
|
""" |
|
|
OpenRouter - Access to many FREE models with single API key. |
|
|
|
|
|
Free models include: Llama, Mistral, Gemma, and more |
|
|
Get free key: https://openrouter.ai/keys |
|
|
""" |
|
|
|
|
|
API_URL = "https://openrouter.ai/api/v1/chat/completions" |
|
|
|
|
|
|
|
|
MODELS = { |
|
|
"llama-3.1-8b": "meta-llama/llama-3.1-8b-instruct:free", |
|
|
"gemma-2-9b": "google/gemma-2-9b-it:free", |
|
|
"mistral-7b": "mistralai/mistral-7b-instruct:free", |
|
|
"phi-3-mini": "microsoft/phi-3-mini-128k-instruct:free", |
|
|
"qwen-2-7b": "qwen/qwen-2-7b-instruct:free", |
|
|
} |
|
|
|
|
|
def __init__(self, api_key: Optional[str] = None): |
|
|
self.api_key = api_key or get_secret("OPENROUTER_API_KEY") |
|
|
self.name = "OpenRouter" |
|
|
|
|
|
@property |
|
|
def is_configured(self) -> bool: |
|
|
return bool(self.api_key) |
|
|
|
|
|
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024, |
|
|
system_prompt: str = None) -> LLMResponse: |
|
|
if not self.api_key: |
|
|
return LLMResponse("", "", self.name, False, "No OpenRouter API key") |
|
|
|
|
|
model = model or self.MODELS["llama-3.1-8b"] |
|
|
|
|
|
messages = [] |
|
|
if system_prompt: |
|
|
messages.append({"role": "system", "content": system_prompt}) |
|
|
messages.append({"role": "user", "content": prompt}) |
|
|
|
|
|
try: |
|
|
response = requests.post( |
|
|
self.API_URL, |
|
|
headers={ |
|
|
"Authorization": f"Bearer {self.api_key}", |
|
|
"Content-Type": "application/json", |
|
|
"HTTP-Referer": "https://sparknet.streamlit.app", |
|
|
"X-Title": "SPARKNET" |
|
|
}, |
|
|
json={ |
|
|
"model": model, |
|
|
"messages": messages, |
|
|
"max_tokens": max_tokens, |
|
|
}, |
|
|
timeout=60 |
|
|
) |
|
|
response.raise_for_status() |
|
|
result = response.json() |
|
|
|
|
|
return LLMResponse( |
|
|
text=result["choices"][0]["message"]["content"], |
|
|
model=model, |
|
|
provider=self.name, |
|
|
success=True, |
|
|
usage=result.get("usage") |
|
|
) |
|
|
except Exception as e: |
|
|
return LLMResponse("", model, self.name, False, str(e)) |
|
|
|
|
|
|
|
|
class GitHubModelsProvider: |
|
|
""" |
|
|
GitHub Models - FREE access to top-tier models. |
|
|
|
|
|
Free models: GPT-4o, Llama 3.1, Mistral, and more |
|
|
Get token: https://github.com/settings/tokens (with 'models' scope) |
|
|
""" |
|
|
|
|
|
API_URL = "https://models.inference.ai.azure.com/chat/completions" |
|
|
|
|
|
MODELS = { |
|
|
"gpt-4o": "gpt-4o", |
|
|
"gpt-4o-mini": "gpt-4o-mini", |
|
|
"llama-3.1-70b": "Meta-Llama-3.1-70B-Instruct", |
|
|
"llama-3.1-8b": "Meta-Llama-3.1-8B-Instruct", |
|
|
"mistral-large": "Mistral-large", |
|
|
} |
|
|
|
|
|
def __init__(self, api_key: Optional[str] = None): |
|
|
self.api_key = api_key or get_secret("GITHUB_TOKEN") or get_secret("GITHUB_MODELS_TOKEN") |
|
|
self.name = "GitHub Models" |
|
|
|
|
|
@property |
|
|
def is_configured(self) -> bool: |
|
|
return bool(self.api_key) |
|
|
|
|
|
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024, |
|
|
system_prompt: str = None) -> LLMResponse: |
|
|
if not self.api_key: |
|
|
return LLMResponse("", "", self.name, False, "No GitHub token") |
|
|
|
|
|
model = model or self.MODELS["gpt-4o-mini"] |
|
|
|
|
|
messages = [] |
|
|
if system_prompt: |
|
|
messages.append({"role": "system", "content": system_prompt}) |
|
|
messages.append({"role": "user", "content": prompt}) |
|
|
|
|
|
try: |
|
|
response = requests.post( |
|
|
self.API_URL, |
|
|
headers={ |
|
|
"Authorization": f"Bearer {self.api_key}", |
|
|
"Content-Type": "application/json" |
|
|
}, |
|
|
json={ |
|
|
"model": model, |
|
|
"messages": messages, |
|
|
"max_tokens": max_tokens, |
|
|
}, |
|
|
timeout=60 |
|
|
) |
|
|
response.raise_for_status() |
|
|
result = response.json() |
|
|
|
|
|
return LLMResponse( |
|
|
text=result["choices"][0]["message"]["content"], |
|
|
model=model, |
|
|
provider=self.name, |
|
|
success=True, |
|
|
usage=result.get("usage") |
|
|
) |
|
|
except Exception as e: |
|
|
return LLMResponse("", model, self.name, False, str(e)) |
|
|
|
|
|
|
|
|
class HuggingFaceProvider: |
|
|
""" |
|
|
HuggingFace Inference API - FREE access to thousands of models. |
|
|
|
|
|
Get free token: https://huggingface.co/settings/tokens |
|
|
""" |
|
|
|
|
|
API_URL = "https://api-inference.huggingface.co/models/" |
|
|
|
|
|
MODELS = { |
|
|
"zephyr-7b": "HuggingFaceH4/zephyr-7b-beta", |
|
|
"mistral-7b": "mistralai/Mistral-7B-Instruct-v0.2", |
|
|
"llama-2-7b": "meta-llama/Llama-2-7b-chat-hf", |
|
|
"flan-t5": "google/flan-t5-large", |
|
|
"embed": "sentence-transformers/all-MiniLM-L6-v2", |
|
|
} |
|
|
|
|
|
def __init__(self, api_key: Optional[str] = None): |
|
|
self.api_key = api_key or get_secret("HF_TOKEN") or get_secret("HUGGINGFACE_TOKEN") |
|
|
self.name = "HuggingFace" |
|
|
|
|
|
@property |
|
|
def is_configured(self) -> bool: |
|
|
return bool(self.api_key) |
|
|
|
|
|
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 500, |
|
|
system_prompt: str = None) -> LLMResponse: |
|
|
model = model or self.MODELS["zephyr-7b"] |
|
|
url = f"{self.API_URL}{model}" |
|
|
|
|
|
|
|
|
full_prompt = prompt |
|
|
if system_prompt: |
|
|
full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:" |
|
|
|
|
|
headers = {"Content-Type": "application/json"} |
|
|
if self.api_key: |
|
|
headers["Authorization"] = f"Bearer {self.api_key}" |
|
|
|
|
|
try: |
|
|
response = requests.post( |
|
|
url, |
|
|
headers=headers, |
|
|
json={ |
|
|
"inputs": full_prompt, |
|
|
"parameters": { |
|
|
"max_new_tokens": max_tokens, |
|
|
"temperature": 0.7, |
|
|
"do_sample": True, |
|
|
"return_full_text": False, |
|
|
}, |
|
|
"options": {"wait_for_model": True} |
|
|
}, |
|
|
timeout=120 |
|
|
) |
|
|
|
|
|
if response.status_code == 503: |
|
|
return LLMResponse("", model, self.name, False, "Model is loading, try again") |
|
|
|
|
|
response.raise_for_status() |
|
|
result = response.json() |
|
|
|
|
|
if isinstance(result, list) and len(result) > 0: |
|
|
text = result[0].get("generated_text", "") |
|
|
else: |
|
|
text = str(result) |
|
|
|
|
|
return LLMResponse(text=text, model=model, provider=self.name, success=True) |
|
|
|
|
|
except Exception as e: |
|
|
return LLMResponse("", model, self.name, False, str(e)) |
|
|
|
|
|
def embed(self, texts: List[str], model: Optional[str] = None) -> Tuple[List[List[float]], Optional[str]]: |
|
|
"""Generate embeddings.""" |
|
|
model = model or self.MODELS["embed"] |
|
|
url = f"{self.API_URL}{model}" |
|
|
|
|
|
headers = {"Content-Type": "application/json"} |
|
|
if self.api_key: |
|
|
headers["Authorization"] = f"Bearer {self.api_key}" |
|
|
|
|
|
try: |
|
|
response = requests.post( |
|
|
url, |
|
|
headers=headers, |
|
|
json={"inputs": texts, "options": {"wait_for_model": True}}, |
|
|
timeout=60 |
|
|
) |
|
|
response.raise_for_status() |
|
|
return response.json(), None |
|
|
except Exception as e: |
|
|
return [], str(e) |
|
|
|
|
|
|
|
|
class TogetherAIProvider: |
|
|
""" |
|
|
Together AI - $25 FREE credits. |
|
|
|
|
|
Access to Llama, Mistral, and many other models |
|
|
Get free credits: https://www.together.ai/ |
|
|
""" |
|
|
|
|
|
API_URL = "https://api.together.xyz/v1/chat/completions" |
|
|
|
|
|
MODELS = { |
|
|
"llama-3.1-8b": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", |
|
|
"llama-3.1-70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", |
|
|
"mistral-7b": "mistralai/Mistral-7B-Instruct-v0.3", |
|
|
"qwen-2-72b": "Qwen/Qwen2-72B-Instruct", |
|
|
} |
|
|
|
|
|
def __init__(self, api_key: Optional[str] = None): |
|
|
self.api_key = api_key or get_secret("TOGETHER_API_KEY") |
|
|
self.name = "Together AI" |
|
|
|
|
|
@property |
|
|
def is_configured(self) -> bool: |
|
|
return bool(self.api_key) |
|
|
|
|
|
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024, |
|
|
system_prompt: str = None) -> LLMResponse: |
|
|
if not self.api_key: |
|
|
return LLMResponse("", "", self.name, False, "No Together AI API key") |
|
|
|
|
|
model = model or self.MODELS["llama-3.1-8b"] |
|
|
|
|
|
messages = [] |
|
|
if system_prompt: |
|
|
messages.append({"role": "system", "content": system_prompt}) |
|
|
messages.append({"role": "user", "content": prompt}) |
|
|
|
|
|
try: |
|
|
response = requests.post( |
|
|
self.API_URL, |
|
|
headers={ |
|
|
"Authorization": f"Bearer {self.api_key}", |
|
|
"Content-Type": "application/json" |
|
|
}, |
|
|
json={ |
|
|
"model": model, |
|
|
"messages": messages, |
|
|
"max_tokens": max_tokens, |
|
|
"temperature": 0.7, |
|
|
}, |
|
|
timeout=60 |
|
|
) |
|
|
response.raise_for_status() |
|
|
result = response.json() |
|
|
|
|
|
return LLMResponse( |
|
|
text=result["choices"][0]["message"]["content"], |
|
|
model=model, |
|
|
provider=self.name, |
|
|
success=True, |
|
|
usage=result.get("usage") |
|
|
) |
|
|
except Exception as e: |
|
|
return LLMResponse("", model, self.name, False, str(e)) |
|
|
|
|
|
|
|
|
class MistralAIProvider: |
|
|
""" |
|
|
Mistral AI - FREE "Experiment" plan. |
|
|
|
|
|
Get free access: https://console.mistral.ai/ |
|
|
""" |
|
|
|
|
|
API_URL = "https://api.mistral.ai/v1/chat/completions" |
|
|
|
|
|
MODELS = { |
|
|
"mistral-small": "mistral-small-latest", |
|
|
"mistral-medium": "mistral-medium-latest", |
|
|
"mistral-large": "mistral-large-latest", |
|
|
"codestral": "codestral-latest", |
|
|
} |
|
|
|
|
|
def __init__(self, api_key: Optional[str] = None): |
|
|
self.api_key = api_key or get_secret("MISTRAL_API_KEY") |
|
|
self.name = "Mistral AI" |
|
|
|
|
|
@property |
|
|
def is_configured(self) -> bool: |
|
|
return bool(self.api_key) |
|
|
|
|
|
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024, |
|
|
system_prompt: str = None) -> LLMResponse: |
|
|
if not self.api_key: |
|
|
return LLMResponse("", "", self.name, False, "No Mistral API key") |
|
|
|
|
|
model = model or self.MODELS["mistral-small"] |
|
|
|
|
|
messages = [] |
|
|
if system_prompt: |
|
|
messages.append({"role": "system", "content": system_prompt}) |
|
|
messages.append({"role": "user", "content": prompt}) |
|
|
|
|
|
try: |
|
|
response = requests.post( |
|
|
self.API_URL, |
|
|
headers={ |
|
|
"Authorization": f"Bearer {self.api_key}", |
|
|
"Content-Type": "application/json" |
|
|
}, |
|
|
json={ |
|
|
"model": model, |
|
|
"messages": messages, |
|
|
"max_tokens": max_tokens, |
|
|
}, |
|
|
timeout=60 |
|
|
) |
|
|
response.raise_for_status() |
|
|
result = response.json() |
|
|
|
|
|
return LLMResponse( |
|
|
text=result["choices"][0]["message"]["content"], |
|
|
model=model, |
|
|
provider=self.name, |
|
|
success=True, |
|
|
usage=result.get("usage") |
|
|
) |
|
|
except Exception as e: |
|
|
return LLMResponse("", model, self.name, False, str(e)) |
|
|
|
|
|
|
|
|
class OfflineProvider: |
|
|
""" |
|
|
Offline/Demo mode - No API required. |
|
|
|
|
|
Provides extractive responses from context for demonstration. |
|
|
""" |
|
|
|
|
|
def __init__(self): |
|
|
self.name = "Offline" |
|
|
|
|
|
@property |
|
|
def is_configured(self) -> bool: |
|
|
return True |
|
|
|
|
|
def generate(self, prompt: str, context: str = "", **kwargs) -> LLMResponse: |
|
|
if context: |
|
|
sentences = [s.strip() for s in context.split('.') if len(s.strip()) > 20][:3] |
|
|
if sentences: |
|
|
response = f"Based on the documents: {sentences[0]}." |
|
|
if len(sentences) > 1: |
|
|
response += f" Additionally, {sentences[1].lower()}." |
|
|
else: |
|
|
response = "I found relevant information but cannot generate a detailed response in offline mode." |
|
|
else: |
|
|
response = ("I'm running in offline demo mode. Configure a free LLM provider " |
|
|
"(Groq, Gemini, OpenRouter, etc.) for AI-powered responses.") |
|
|
|
|
|
return LLMResponse(text=response, model="offline", provider=self.name, success=True) |
|
|
|
|
|
def embed(self, texts: List[str]) -> Tuple[List[List[float]], Optional[str]]: |
|
|
"""Generate simple hash-based embeddings for demo.""" |
|
|
import hashlib |
|
|
embeddings = [] |
|
|
for text in texts: |
|
|
hash_bytes = hashlib.sha256(text.encode()).digest() |
|
|
embedding = [((b % 200) - 100) / 100.0 for b in (hash_bytes * 12)][:384] |
|
|
embeddings.append(embedding) |
|
|
return embeddings, None |
|
|
|
|
|
|
|
|
class UnifiedLLMProvider: |
|
|
""" |
|
|
Unified interface for all LLM providers. |
|
|
|
|
|
Automatically selects the best available provider based on configured API keys. |
|
|
Priority: Groq > Gemini > OpenRouter > GitHub > Together > Mistral > HuggingFace > Offline |
|
|
""" |
|
|
|
|
|
def __init__(self): |
|
|
self.providers: Dict[str, Any] = {} |
|
|
self.active_provider: Optional[str] = None |
|
|
self.active_embed_provider: Optional[str] = None |
|
|
self._init_providers() |
|
|
|
|
|
def _init_providers(self): |
|
|
"""Initialize all available providers.""" |
|
|
|
|
|
|
|
|
provider_classes = [ |
|
|
("groq", GroqProvider), |
|
|
("gemini", GoogleGeminiProvider), |
|
|
("openrouter", OpenRouterProvider), |
|
|
("github", GitHubModelsProvider), |
|
|
("together", TogetherAIProvider), |
|
|
("mistral", MistralAIProvider), |
|
|
("huggingface", HuggingFaceProvider), |
|
|
("offline", OfflineProvider), |
|
|
] |
|
|
|
|
|
for name, cls in provider_classes: |
|
|
try: |
|
|
provider = cls() |
|
|
self.providers[name] = provider |
|
|
|
|
|
|
|
|
if provider.is_configured and not self.active_provider and name != "offline": |
|
|
self.active_provider = name |
|
|
logger.info(f"Active LLM provider: {provider.name}") |
|
|
|
|
|
except Exception as e: |
|
|
logger.warning(f"Failed to init {name}: {e}") |
|
|
|
|
|
|
|
|
if not self.active_provider: |
|
|
self.active_provider = "offline" |
|
|
logger.warning("No LLM API configured, using offline mode") |
|
|
|
|
|
|
|
|
self.active_embed_provider = "huggingface" |
|
|
|
|
|
def generate(self, prompt: str, provider: str = None, **kwargs) -> LLMResponse: |
|
|
"""Generate text using specified or best available provider.""" |
|
|
provider_name = provider or self.active_provider |
|
|
|
|
|
if provider_name and provider_name in self.providers: |
|
|
response = self.providers[provider_name].generate(prompt, **kwargs) |
|
|
if response.success: |
|
|
return response |
|
|
logger.warning(f"{provider_name} failed: {response.error}") |
|
|
|
|
|
|
|
|
for name in ["groq", "gemini", "openrouter", "huggingface", "offline"]: |
|
|
if name in self.providers and name != provider_name: |
|
|
response = self.providers[name].generate(prompt, **kwargs) |
|
|
if response.success: |
|
|
return response |
|
|
|
|
|
return self.providers["offline"].generate(prompt, **kwargs) |
|
|
|
|
|
def embed(self, texts: List[str]) -> Tuple[List[List[float]], Optional[str]]: |
|
|
"""Generate embeddings.""" |
|
|
if self.active_embed_provider and self.active_embed_provider in self.providers: |
|
|
provider = self.providers[self.active_embed_provider] |
|
|
if hasattr(provider, 'embed'): |
|
|
result, error = provider.embed(texts) |
|
|
if not error: |
|
|
return result, None |
|
|
|
|
|
|
|
|
return self.providers["offline"].embed(texts) |
|
|
|
|
|
def get_status(self) -> Dict[str, Any]: |
|
|
"""Get status of all providers.""" |
|
|
status = { |
|
|
"active_llm": self.active_provider, |
|
|
"active_llm_name": self.providers[self.active_provider].name if self.active_provider else "None", |
|
|
"active_embed": self.active_embed_provider, |
|
|
"providers": {} |
|
|
} |
|
|
|
|
|
for name, provider in self.providers.items(): |
|
|
status["providers"][name] = { |
|
|
"name": provider.name, |
|
|
"configured": provider.is_configured, |
|
|
} |
|
|
|
|
|
return status |
|
|
|
|
|
def list_available(self) -> List[str]: |
|
|
"""List all configured providers.""" |
|
|
return [name for name, p in self.providers.items() if p.is_configured and name != "offline"] |
|
|
|
|
|
|
|
|
|
|
|
_llm_provider: Optional[UnifiedLLMProvider] = None |
|
|
|
|
|
|
|
|
def get_llm_provider() -> UnifiedLLMProvider: |
|
|
"""Get or create the unified LLM provider.""" |
|
|
global _llm_provider |
|
|
if _llm_provider is None: |
|
|
_llm_provider = UnifiedLLMProvider() |
|
|
return _llm_provider |
|
|
|
|
|
|
|
|
def generate_response(prompt: str, context: str = "", system_prompt: str = None) -> Tuple[str, Optional[str]]: |
|
|
""" |
|
|
Convenience function to generate a response. |
|
|
|
|
|
Args: |
|
|
prompt: User prompt |
|
|
context: Optional context from retrieved documents |
|
|
system_prompt: Optional system instruction |
|
|
|
|
|
Returns: |
|
|
Tuple of (response_text, error_message) |
|
|
""" |
|
|
provider = get_llm_provider() |
|
|
|
|
|
|
|
|
if context: |
|
|
full_prompt = f"""Context from documents: |
|
|
{context} |
|
|
|
|
|
Question: {prompt} |
|
|
|
|
|
Please answer based on the context provided. If the answer is not in the context, say so.""" |
|
|
else: |
|
|
full_prompt = prompt |
|
|
|
|
|
if not system_prompt: |
|
|
system_prompt = "You are a helpful document analysis assistant. Provide accurate, concise answers based on the provided context." |
|
|
|
|
|
response = provider.generate(full_prompt, system_prompt=system_prompt) |
|
|
|
|
|
if response.success: |
|
|
return response.text, None |
|
|
else: |
|
|
return "", response.error |
|
|
|