SPARKNET / demo /llm_providers.py
MHamdan's picture
Enhance SPARKNET for TTO automation with new scenarios and security features
76c3b0a
"""
Free LLM Providers for SPARKNET
Supports multiple FREE-tier LLM providers:
1. Groq - Very fast, generous free tier (14,400 req/day)
2. Google Gemini - 15 req/min free
3. OpenRouter - Access to many free models
4. GitHub Models - Free GPT-4o, Llama access
5. HuggingFace Inference API - Thousands of free models
6. Together AI - $25 free credits
7. Mistral AI - Free experiment plan
8. Offline mode - No API required
SECURITY & PRIVACY CONSIDERATIONS
==================================
GDPR COMPLIANCE:
- Cloud LLM providers may process data outside the EU
- For GDPR-sensitive workloads, use:
1. Offline mode with local Ollama
2. EU-hosted providers (when available)
3. Data anonymization before API calls
- Consider data processing agreements with LLM providers
- Implement data minimization - only send necessary context
DATA ISOLATION OPTIONS:
1. FULLY LOCAL (Maximum Privacy):
- Use Ollama for 100% on-premise inference
- No data transmitted to external services
- Configure: set no cloud API keys, system uses offline mode
2. HYBRID (Balanced):
- Use local Ollama for sensitive documents
- Use cloud LLMs for general queries
- Implement document classification for routing
3. CLOUD-ONLY (Convenience):
- All inference via cloud providers
- Suitable for non-sensitive/public data
- Review provider privacy policies
PRIVATE DEPLOYMENT NOTES:
- For enterprise deployments, configure Ollama on internal network
- Use VPN/private endpoints for database connections
- Enable audit logging for all LLM interactions
- Implement rate limiting and access controls
STREAMLIT CLOUD DEPLOYMENT:
- Store API keys in Streamlit secrets (secrets.toml)
- Never commit secrets to version control
- Use environment variables as fallback
- Enable session-based authentication
Author: SPARKNET Team
Project: VISTA/Horizon EU
"""
import os
import requests
from typing import Optional, Tuple, List, Dict, Any
from dataclasses import dataclass
from loguru import logger
import streamlit as st
@dataclass
class LLMResponse:
text: str
model: str
provider: str
success: bool
error: Optional[str] = None
usage: Optional[Dict[str, int]] = None
def get_secret(key: str, default: str = None) -> Optional[str]:
"""Get secret from Streamlit secrets or environment."""
# Try Streamlit secrets first
try:
if hasattr(st, 'secrets') and key in st.secrets:
return st.secrets[key]
except:
pass
# Fall back to environment
return os.environ.get(key, default)
class GroqProvider:
"""
Groq - FREE tier with very fast inference.
Free tier: 14,400 requests/day, 300+ tokens/sec
Get free key: https://console.groq.com/keys
"""
API_URL = "https://api.groq.com/openai/v1/chat/completions"
MODELS = {
"llama-3.3-70b": "llama-3.3-70b-versatile",
"llama-3.1-8b": "llama-3.1-8b-instant",
"mixtral": "mixtral-8x7b-32768",
"gemma2": "gemma2-9b-it",
}
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or get_secret("GROQ_API_KEY")
self.name = "Groq"
@property
def is_configured(self) -> bool:
return bool(self.api_key)
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024,
system_prompt: str = None) -> LLMResponse:
if not self.api_key:
return LLMResponse("", "", self.name, False, "No Groq API key")
model = model or self.MODELS["llama-3.1-8b"]
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
response = requests.post(
self.API_URL,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": model,
"messages": messages,
"max_tokens": max_tokens,
"temperature": 0.7,
},
timeout=30
)
response.raise_for_status()
result = response.json()
return LLMResponse(
text=result["choices"][0]["message"]["content"],
model=model,
provider=self.name,
success=True,
usage=result.get("usage")
)
except Exception as e:
return LLMResponse("", model, self.name, False, str(e))
class GoogleGeminiProvider:
"""
Google AI Studio (Gemini) - FREE tier.
Free tier: ~15 requests/min, Gemini 2.0 Flash & 1.5 Pro
Get free key: https://aistudio.google.com/apikey
"""
API_URL = "https://generativelanguage.googleapis.com/v1beta/models"
MODELS = {
"gemini-2.0-flash": "gemini-2.0-flash-exp",
"gemini-1.5-flash": "gemini-1.5-flash",
"gemini-1.5-pro": "gemini-1.5-pro",
}
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or get_secret("GOOGLE_API_KEY") or get_secret("GEMINI_API_KEY")
self.name = "Google Gemini"
@property
def is_configured(self) -> bool:
return bool(self.api_key)
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024,
system_prompt: str = None) -> LLMResponse:
if not self.api_key:
return LLMResponse("", "", self.name, False, "No Google API key")
model = model or self.MODELS["gemini-1.5-flash"]
# Build content
contents = []
if system_prompt:
contents.append({"role": "user", "parts": [{"text": system_prompt}]})
contents.append({"role": "model", "parts": [{"text": "Understood. I will follow these instructions."}]})
contents.append({"role": "user", "parts": [{"text": prompt}]})
try:
url = f"{self.API_URL}/{model}:generateContent?key={self.api_key}"
response = requests.post(
url,
json={
"contents": contents,
"generationConfig": {
"maxOutputTokens": max_tokens,
"temperature": 0.7,
}
},
timeout=60
)
response.raise_for_status()
result = response.json()
text = result["candidates"][0]["content"]["parts"][0]["text"]
return LLMResponse(
text=text,
model=model,
provider=self.name,
success=True
)
except Exception as e:
return LLMResponse("", model, self.name, False, str(e))
class OpenRouterProvider:
"""
OpenRouter - Access to many FREE models with single API key.
Free models include: Llama, Mistral, Gemma, and more
Get free key: https://openrouter.ai/keys
"""
API_URL = "https://openrouter.ai/api/v1/chat/completions"
# Free models on OpenRouter
MODELS = {
"llama-3.1-8b": "meta-llama/llama-3.1-8b-instruct:free",
"gemma-2-9b": "google/gemma-2-9b-it:free",
"mistral-7b": "mistralai/mistral-7b-instruct:free",
"phi-3-mini": "microsoft/phi-3-mini-128k-instruct:free",
"qwen-2-7b": "qwen/qwen-2-7b-instruct:free",
}
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or get_secret("OPENROUTER_API_KEY")
self.name = "OpenRouter"
@property
def is_configured(self) -> bool:
return bool(self.api_key)
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024,
system_prompt: str = None) -> LLMResponse:
if not self.api_key:
return LLMResponse("", "", self.name, False, "No OpenRouter API key")
model = model or self.MODELS["llama-3.1-8b"]
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
response = requests.post(
self.API_URL,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
"HTTP-Referer": "https://sparknet.streamlit.app",
"X-Title": "SPARKNET"
},
json={
"model": model,
"messages": messages,
"max_tokens": max_tokens,
},
timeout=60
)
response.raise_for_status()
result = response.json()
return LLMResponse(
text=result["choices"][0]["message"]["content"],
model=model,
provider=self.name,
success=True,
usage=result.get("usage")
)
except Exception as e:
return LLMResponse("", model, self.name, False, str(e))
class GitHubModelsProvider:
"""
GitHub Models - FREE access to top-tier models.
Free models: GPT-4o, Llama 3.1, Mistral, and more
Get token: https://github.com/settings/tokens (with 'models' scope)
"""
API_URL = "https://models.inference.ai.azure.com/chat/completions"
MODELS = {
"gpt-4o": "gpt-4o",
"gpt-4o-mini": "gpt-4o-mini",
"llama-3.1-70b": "Meta-Llama-3.1-70B-Instruct",
"llama-3.1-8b": "Meta-Llama-3.1-8B-Instruct",
"mistral-large": "Mistral-large",
}
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or get_secret("GITHUB_TOKEN") or get_secret("GITHUB_MODELS_TOKEN")
self.name = "GitHub Models"
@property
def is_configured(self) -> bool:
return bool(self.api_key)
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024,
system_prompt: str = None) -> LLMResponse:
if not self.api_key:
return LLMResponse("", "", self.name, False, "No GitHub token")
model = model or self.MODELS["gpt-4o-mini"]
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
response = requests.post(
self.API_URL,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": model,
"messages": messages,
"max_tokens": max_tokens,
},
timeout=60
)
response.raise_for_status()
result = response.json()
return LLMResponse(
text=result["choices"][0]["message"]["content"],
model=model,
provider=self.name,
success=True,
usage=result.get("usage")
)
except Exception as e:
return LLMResponse("", model, self.name, False, str(e))
class HuggingFaceProvider:
"""
HuggingFace Inference API - FREE access to thousands of models.
Get free token: https://huggingface.co/settings/tokens
"""
API_URL = "https://api-inference.huggingface.co/models/"
MODELS = {
"zephyr-7b": "HuggingFaceH4/zephyr-7b-beta",
"mistral-7b": "mistralai/Mistral-7B-Instruct-v0.2",
"llama-2-7b": "meta-llama/Llama-2-7b-chat-hf",
"flan-t5": "google/flan-t5-large",
"embed": "sentence-transformers/all-MiniLM-L6-v2",
}
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or get_secret("HF_TOKEN") or get_secret("HUGGINGFACE_TOKEN")
self.name = "HuggingFace"
@property
def is_configured(self) -> bool:
return bool(self.api_key)
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 500,
system_prompt: str = None) -> LLMResponse:
model = model or self.MODELS["zephyr-7b"]
url = f"{self.API_URL}{model}"
# Format prompt with system instruction
full_prompt = prompt
if system_prompt:
full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:"
headers = {"Content-Type": "application/json"}
if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"
try:
response = requests.post(
url,
headers=headers,
json={
"inputs": full_prompt,
"parameters": {
"max_new_tokens": max_tokens,
"temperature": 0.7,
"do_sample": True,
"return_full_text": False,
},
"options": {"wait_for_model": True}
},
timeout=120
)
if response.status_code == 503:
return LLMResponse("", model, self.name, False, "Model is loading, try again")
response.raise_for_status()
result = response.json()
if isinstance(result, list) and len(result) > 0:
text = result[0].get("generated_text", "")
else:
text = str(result)
return LLMResponse(text=text, model=model, provider=self.name, success=True)
except Exception as e:
return LLMResponse("", model, self.name, False, str(e))
def embed(self, texts: List[str], model: Optional[str] = None) -> Tuple[List[List[float]], Optional[str]]:
"""Generate embeddings."""
model = model or self.MODELS["embed"]
url = f"{self.API_URL}{model}"
headers = {"Content-Type": "application/json"}
if self.api_key:
headers["Authorization"] = f"Bearer {self.api_key}"
try:
response = requests.post(
url,
headers=headers,
json={"inputs": texts, "options": {"wait_for_model": True}},
timeout=60
)
response.raise_for_status()
return response.json(), None
except Exception as e:
return [], str(e)
class TogetherAIProvider:
"""
Together AI - $25 FREE credits.
Access to Llama, Mistral, and many other models
Get free credits: https://www.together.ai/
"""
API_URL = "https://api.together.xyz/v1/chat/completions"
MODELS = {
"llama-3.1-8b": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"llama-3.1-70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
"mistral-7b": "mistralai/Mistral-7B-Instruct-v0.3",
"qwen-2-72b": "Qwen/Qwen2-72B-Instruct",
}
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or get_secret("TOGETHER_API_KEY")
self.name = "Together AI"
@property
def is_configured(self) -> bool:
return bool(self.api_key)
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024,
system_prompt: str = None) -> LLMResponse:
if not self.api_key:
return LLMResponse("", "", self.name, False, "No Together AI API key")
model = model or self.MODELS["llama-3.1-8b"]
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
response = requests.post(
self.API_URL,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": model,
"messages": messages,
"max_tokens": max_tokens,
"temperature": 0.7,
},
timeout=60
)
response.raise_for_status()
result = response.json()
return LLMResponse(
text=result["choices"][0]["message"]["content"],
model=model,
provider=self.name,
success=True,
usage=result.get("usage")
)
except Exception as e:
return LLMResponse("", model, self.name, False, str(e))
class MistralAIProvider:
"""
Mistral AI - FREE "Experiment" plan.
Get free access: https://console.mistral.ai/
"""
API_URL = "https://api.mistral.ai/v1/chat/completions"
MODELS = {
"mistral-small": "mistral-small-latest",
"mistral-medium": "mistral-medium-latest",
"mistral-large": "mistral-large-latest",
"codestral": "codestral-latest",
}
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or get_secret("MISTRAL_API_KEY")
self.name = "Mistral AI"
@property
def is_configured(self) -> bool:
return bool(self.api_key)
def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024,
system_prompt: str = None) -> LLMResponse:
if not self.api_key:
return LLMResponse("", "", self.name, False, "No Mistral API key")
model = model or self.MODELS["mistral-small"]
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
try:
response = requests.post(
self.API_URL,
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
},
json={
"model": model,
"messages": messages,
"max_tokens": max_tokens,
},
timeout=60
)
response.raise_for_status()
result = response.json()
return LLMResponse(
text=result["choices"][0]["message"]["content"],
model=model,
provider=self.name,
success=True,
usage=result.get("usage")
)
except Exception as e:
return LLMResponse("", model, self.name, False, str(e))
class OfflineProvider:
"""
Offline/Demo mode - No API required.
Provides extractive responses from context for demonstration.
"""
def __init__(self):
self.name = "Offline"
@property
def is_configured(self) -> bool:
return True
def generate(self, prompt: str, context: str = "", **kwargs) -> LLMResponse:
if context:
sentences = [s.strip() for s in context.split('.') if len(s.strip()) > 20][:3]
if sentences:
response = f"Based on the documents: {sentences[0]}."
if len(sentences) > 1:
response += f" Additionally, {sentences[1].lower()}."
else:
response = "I found relevant information but cannot generate a detailed response in offline mode."
else:
response = ("I'm running in offline demo mode. Configure a free LLM provider "
"(Groq, Gemini, OpenRouter, etc.) for AI-powered responses.")
return LLMResponse(text=response, model="offline", provider=self.name, success=True)
def embed(self, texts: List[str]) -> Tuple[List[List[float]], Optional[str]]:
"""Generate simple hash-based embeddings for demo."""
import hashlib
embeddings = []
for text in texts:
hash_bytes = hashlib.sha256(text.encode()).digest()
embedding = [((b % 200) - 100) / 100.0 for b in (hash_bytes * 12)][:384]
embeddings.append(embedding)
return embeddings, None
class UnifiedLLMProvider:
"""
Unified interface for all LLM providers.
Automatically selects the best available provider based on configured API keys.
Priority: Groq > Gemini > OpenRouter > GitHub > Together > Mistral > HuggingFace > Offline
"""
def __init__(self):
self.providers: Dict[str, Any] = {}
self.active_provider: Optional[str] = None
self.active_embed_provider: Optional[str] = None
self._init_providers()
def _init_providers(self):
"""Initialize all available providers."""
# Initialize providers in priority order
provider_classes = [
("groq", GroqProvider),
("gemini", GoogleGeminiProvider),
("openrouter", OpenRouterProvider),
("github", GitHubModelsProvider),
("together", TogetherAIProvider),
("mistral", MistralAIProvider),
("huggingface", HuggingFaceProvider),
("offline", OfflineProvider),
]
for name, cls in provider_classes:
try:
provider = cls()
self.providers[name] = provider
# Set active provider (first configured one)
if provider.is_configured and not self.active_provider and name != "offline":
self.active_provider = name
logger.info(f"Active LLM provider: {provider.name}")
except Exception as e:
logger.warning(f"Failed to init {name}: {e}")
# Fallback to offline if nothing configured
if not self.active_provider:
self.active_provider = "offline"
logger.warning("No LLM API configured, using offline mode")
# HuggingFace for embeddings (works without token too)
self.active_embed_provider = "huggingface"
def generate(self, prompt: str, provider: str = None, **kwargs) -> LLMResponse:
"""Generate text using specified or best available provider."""
provider_name = provider or self.active_provider
if provider_name and provider_name in self.providers:
response = self.providers[provider_name].generate(prompt, **kwargs)
if response.success:
return response
logger.warning(f"{provider_name} failed: {response.error}")
# Fallback chain
for name in ["groq", "gemini", "openrouter", "huggingface", "offline"]:
if name in self.providers and name != provider_name:
response = self.providers[name].generate(prompt, **kwargs)
if response.success:
return response
return self.providers["offline"].generate(prompt, **kwargs)
def embed(self, texts: List[str]) -> Tuple[List[List[float]], Optional[str]]:
"""Generate embeddings."""
if self.active_embed_provider and self.active_embed_provider in self.providers:
provider = self.providers[self.active_embed_provider]
if hasattr(provider, 'embed'):
result, error = provider.embed(texts)
if not error:
return result, None
# Fallback to offline
return self.providers["offline"].embed(texts)
def get_status(self) -> Dict[str, Any]:
"""Get status of all providers."""
status = {
"active_llm": self.active_provider,
"active_llm_name": self.providers[self.active_provider].name if self.active_provider else "None",
"active_embed": self.active_embed_provider,
"providers": {}
}
for name, provider in self.providers.items():
status["providers"][name] = {
"name": provider.name,
"configured": provider.is_configured,
}
return status
def list_available(self) -> List[str]:
"""List all configured providers."""
return [name for name, p in self.providers.items() if p.is_configured and name != "offline"]
# Global instance
_llm_provider: Optional[UnifiedLLMProvider] = None
def get_llm_provider() -> UnifiedLLMProvider:
"""Get or create the unified LLM provider."""
global _llm_provider
if _llm_provider is None:
_llm_provider = UnifiedLLMProvider()
return _llm_provider
def generate_response(prompt: str, context: str = "", system_prompt: str = None) -> Tuple[str, Optional[str]]:
"""
Convenience function to generate a response.
Args:
prompt: User prompt
context: Optional context from retrieved documents
system_prompt: Optional system instruction
Returns:
Tuple of (response_text, error_message)
"""
provider = get_llm_provider()
# Build full prompt with context
if context:
full_prompt = f"""Context from documents:
{context}
Question: {prompt}
Please answer based on the context provided. If the answer is not in the context, say so."""
else:
full_prompt = prompt
if not system_prompt:
system_prompt = "You are a helpful document analysis assistant. Provide accurate, concise answers based on the provided context."
response = provider.generate(full_prompt, system_prompt=system_prompt)
if response.success:
return response.text, None
else:
return "", response.error