""" Free LLM Providers for SPARKNET Supports multiple FREE-tier LLM providers: 1. Groq - Very fast, generous free tier (14,400 req/day) 2. Google Gemini - 15 req/min free 3. OpenRouter - Access to many free models 4. GitHub Models - Free GPT-4o, Llama access 5. HuggingFace Inference API - Thousands of free models 6. Together AI - $25 free credits 7. Mistral AI - Free experiment plan 8. Offline mode - No API required SECURITY & PRIVACY CONSIDERATIONS ================================== GDPR COMPLIANCE: - Cloud LLM providers may process data outside the EU - For GDPR-sensitive workloads, use: 1. Offline mode with local Ollama 2. EU-hosted providers (when available) 3. Data anonymization before API calls - Consider data processing agreements with LLM providers - Implement data minimization - only send necessary context DATA ISOLATION OPTIONS: 1. FULLY LOCAL (Maximum Privacy): - Use Ollama for 100% on-premise inference - No data transmitted to external services - Configure: set no cloud API keys, system uses offline mode 2. HYBRID (Balanced): - Use local Ollama for sensitive documents - Use cloud LLMs for general queries - Implement document classification for routing 3. CLOUD-ONLY (Convenience): - All inference via cloud providers - Suitable for non-sensitive/public data - Review provider privacy policies PRIVATE DEPLOYMENT NOTES: - For enterprise deployments, configure Ollama on internal network - Use VPN/private endpoints for database connections - Enable audit logging for all LLM interactions - Implement rate limiting and access controls STREAMLIT CLOUD DEPLOYMENT: - Store API keys in Streamlit secrets (secrets.toml) - Never commit secrets to version control - Use environment variables as fallback - Enable session-based authentication Author: SPARKNET Team Project: VISTA/Horizon EU """ import os import requests from typing import Optional, Tuple, List, Dict, Any from dataclasses import dataclass from loguru import logger import streamlit as st @dataclass class LLMResponse: text: str model: str provider: str success: bool error: Optional[str] = None usage: Optional[Dict[str, int]] = None def get_secret(key: str, default: str = None) -> Optional[str]: """Get secret from Streamlit secrets or environment.""" # Try Streamlit secrets first try: if hasattr(st, 'secrets') and key in st.secrets: return st.secrets[key] except: pass # Fall back to environment return os.environ.get(key, default) class GroqProvider: """ Groq - FREE tier with very fast inference. Free tier: 14,400 requests/day, 300+ tokens/sec Get free key: https://console.groq.com/keys """ API_URL = "https://api.groq.com/openai/v1/chat/completions" MODELS = { "llama-3.3-70b": "llama-3.3-70b-versatile", "llama-3.1-8b": "llama-3.1-8b-instant", "mixtral": "mixtral-8x7b-32768", "gemma2": "gemma2-9b-it", } def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or get_secret("GROQ_API_KEY") self.name = "Groq" @property def is_configured(self) -> bool: return bool(self.api_key) def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024, system_prompt: str = None) -> LLMResponse: if not self.api_key: return LLMResponse("", "", self.name, False, "No Groq API key") model = model or self.MODELS["llama-3.1-8b"] messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) try: response = requests.post( self.API_URL, headers={ "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" }, json={ "model": model, "messages": messages, "max_tokens": max_tokens, "temperature": 0.7, }, timeout=30 ) response.raise_for_status() result = response.json() return LLMResponse( text=result["choices"][0]["message"]["content"], model=model, provider=self.name, success=True, usage=result.get("usage") ) except Exception as e: return LLMResponse("", model, self.name, False, str(e)) class GoogleGeminiProvider: """ Google AI Studio (Gemini) - FREE tier. Free tier: ~15 requests/min, Gemini 2.0 Flash & 1.5 Pro Get free key: https://aistudio.google.com/apikey """ API_URL = "https://generativelanguage.googleapis.com/v1beta/models" MODELS = { "gemini-2.0-flash": "gemini-2.0-flash-exp", "gemini-1.5-flash": "gemini-1.5-flash", "gemini-1.5-pro": "gemini-1.5-pro", } def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or get_secret("GOOGLE_API_KEY") or get_secret("GEMINI_API_KEY") self.name = "Google Gemini" @property def is_configured(self) -> bool: return bool(self.api_key) def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024, system_prompt: str = None) -> LLMResponse: if not self.api_key: return LLMResponse("", "", self.name, False, "No Google API key") model = model or self.MODELS["gemini-1.5-flash"] # Build content contents = [] if system_prompt: contents.append({"role": "user", "parts": [{"text": system_prompt}]}) contents.append({"role": "model", "parts": [{"text": "Understood. I will follow these instructions."}]}) contents.append({"role": "user", "parts": [{"text": prompt}]}) try: url = f"{self.API_URL}/{model}:generateContent?key={self.api_key}" response = requests.post( url, json={ "contents": contents, "generationConfig": { "maxOutputTokens": max_tokens, "temperature": 0.7, } }, timeout=60 ) response.raise_for_status() result = response.json() text = result["candidates"][0]["content"]["parts"][0]["text"] return LLMResponse( text=text, model=model, provider=self.name, success=True ) except Exception as e: return LLMResponse("", model, self.name, False, str(e)) class OpenRouterProvider: """ OpenRouter - Access to many FREE models with single API key. Free models include: Llama, Mistral, Gemma, and more Get free key: https://openrouter.ai/keys """ API_URL = "https://openrouter.ai/api/v1/chat/completions" # Free models on OpenRouter MODELS = { "llama-3.1-8b": "meta-llama/llama-3.1-8b-instruct:free", "gemma-2-9b": "google/gemma-2-9b-it:free", "mistral-7b": "mistralai/mistral-7b-instruct:free", "phi-3-mini": "microsoft/phi-3-mini-128k-instruct:free", "qwen-2-7b": "qwen/qwen-2-7b-instruct:free", } def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or get_secret("OPENROUTER_API_KEY") self.name = "OpenRouter" @property def is_configured(self) -> bool: return bool(self.api_key) def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024, system_prompt: str = None) -> LLMResponse: if not self.api_key: return LLMResponse("", "", self.name, False, "No OpenRouter API key") model = model or self.MODELS["llama-3.1-8b"] messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) try: response = requests.post( self.API_URL, headers={ "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", "HTTP-Referer": "https://sparknet.streamlit.app", "X-Title": "SPARKNET" }, json={ "model": model, "messages": messages, "max_tokens": max_tokens, }, timeout=60 ) response.raise_for_status() result = response.json() return LLMResponse( text=result["choices"][0]["message"]["content"], model=model, provider=self.name, success=True, usage=result.get("usage") ) except Exception as e: return LLMResponse("", model, self.name, False, str(e)) class GitHubModelsProvider: """ GitHub Models - FREE access to top-tier models. Free models: GPT-4o, Llama 3.1, Mistral, and more Get token: https://github.com/settings/tokens (with 'models' scope) """ API_URL = "https://models.inference.ai.azure.com/chat/completions" MODELS = { "gpt-4o": "gpt-4o", "gpt-4o-mini": "gpt-4o-mini", "llama-3.1-70b": "Meta-Llama-3.1-70B-Instruct", "llama-3.1-8b": "Meta-Llama-3.1-8B-Instruct", "mistral-large": "Mistral-large", } def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or get_secret("GITHUB_TOKEN") or get_secret("GITHUB_MODELS_TOKEN") self.name = "GitHub Models" @property def is_configured(self) -> bool: return bool(self.api_key) def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024, system_prompt: str = None) -> LLMResponse: if not self.api_key: return LLMResponse("", "", self.name, False, "No GitHub token") model = model or self.MODELS["gpt-4o-mini"] messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) try: response = requests.post( self.API_URL, headers={ "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" }, json={ "model": model, "messages": messages, "max_tokens": max_tokens, }, timeout=60 ) response.raise_for_status() result = response.json() return LLMResponse( text=result["choices"][0]["message"]["content"], model=model, provider=self.name, success=True, usage=result.get("usage") ) except Exception as e: return LLMResponse("", model, self.name, False, str(e)) class HuggingFaceProvider: """ HuggingFace Inference API - FREE access to thousands of models. Get free token: https://huggingface.co/settings/tokens """ API_URL = "https://api-inference.huggingface.co/models/" MODELS = { "zephyr-7b": "HuggingFaceH4/zephyr-7b-beta", "mistral-7b": "mistralai/Mistral-7B-Instruct-v0.2", "llama-2-7b": "meta-llama/Llama-2-7b-chat-hf", "flan-t5": "google/flan-t5-large", "embed": "sentence-transformers/all-MiniLM-L6-v2", } def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or get_secret("HF_TOKEN") or get_secret("HUGGINGFACE_TOKEN") self.name = "HuggingFace" @property def is_configured(self) -> bool: return bool(self.api_key) def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 500, system_prompt: str = None) -> LLMResponse: model = model or self.MODELS["zephyr-7b"] url = f"{self.API_URL}{model}" # Format prompt with system instruction full_prompt = prompt if system_prompt: full_prompt = f"{system_prompt}\n\nUser: {prompt}\nAssistant:" headers = {"Content-Type": "application/json"} if self.api_key: headers["Authorization"] = f"Bearer {self.api_key}" try: response = requests.post( url, headers=headers, json={ "inputs": full_prompt, "parameters": { "max_new_tokens": max_tokens, "temperature": 0.7, "do_sample": True, "return_full_text": False, }, "options": {"wait_for_model": True} }, timeout=120 ) if response.status_code == 503: return LLMResponse("", model, self.name, False, "Model is loading, try again") response.raise_for_status() result = response.json() if isinstance(result, list) and len(result) > 0: text = result[0].get("generated_text", "") else: text = str(result) return LLMResponse(text=text, model=model, provider=self.name, success=True) except Exception as e: return LLMResponse("", model, self.name, False, str(e)) def embed(self, texts: List[str], model: Optional[str] = None) -> Tuple[List[List[float]], Optional[str]]: """Generate embeddings.""" model = model or self.MODELS["embed"] url = f"{self.API_URL}{model}" headers = {"Content-Type": "application/json"} if self.api_key: headers["Authorization"] = f"Bearer {self.api_key}" try: response = requests.post( url, headers=headers, json={"inputs": texts, "options": {"wait_for_model": True}}, timeout=60 ) response.raise_for_status() return response.json(), None except Exception as e: return [], str(e) class TogetherAIProvider: """ Together AI - $25 FREE credits. Access to Llama, Mistral, and many other models Get free credits: https://www.together.ai/ """ API_URL = "https://api.together.xyz/v1/chat/completions" MODELS = { "llama-3.1-8b": "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", "llama-3.1-70b": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", "mistral-7b": "mistralai/Mistral-7B-Instruct-v0.3", "qwen-2-72b": "Qwen/Qwen2-72B-Instruct", } def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or get_secret("TOGETHER_API_KEY") self.name = "Together AI" @property def is_configured(self) -> bool: return bool(self.api_key) def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024, system_prompt: str = None) -> LLMResponse: if not self.api_key: return LLMResponse("", "", self.name, False, "No Together AI API key") model = model or self.MODELS["llama-3.1-8b"] messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) try: response = requests.post( self.API_URL, headers={ "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" }, json={ "model": model, "messages": messages, "max_tokens": max_tokens, "temperature": 0.7, }, timeout=60 ) response.raise_for_status() result = response.json() return LLMResponse( text=result["choices"][0]["message"]["content"], model=model, provider=self.name, success=True, usage=result.get("usage") ) except Exception as e: return LLMResponse("", model, self.name, False, str(e)) class MistralAIProvider: """ Mistral AI - FREE "Experiment" plan. Get free access: https://console.mistral.ai/ """ API_URL = "https://api.mistral.ai/v1/chat/completions" MODELS = { "mistral-small": "mistral-small-latest", "mistral-medium": "mistral-medium-latest", "mistral-large": "mistral-large-latest", "codestral": "codestral-latest", } def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or get_secret("MISTRAL_API_KEY") self.name = "Mistral AI" @property def is_configured(self) -> bool: return bool(self.api_key) def generate(self, prompt: str, model: Optional[str] = None, max_tokens: int = 1024, system_prompt: str = None) -> LLMResponse: if not self.api_key: return LLMResponse("", "", self.name, False, "No Mistral API key") model = model or self.MODELS["mistral-small"] messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "user", "content": prompt}) try: response = requests.post( self.API_URL, headers={ "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" }, json={ "model": model, "messages": messages, "max_tokens": max_tokens, }, timeout=60 ) response.raise_for_status() result = response.json() return LLMResponse( text=result["choices"][0]["message"]["content"], model=model, provider=self.name, success=True, usage=result.get("usage") ) except Exception as e: return LLMResponse("", model, self.name, False, str(e)) class OfflineProvider: """ Offline/Demo mode - No API required. Provides extractive responses from context for demonstration. """ def __init__(self): self.name = "Offline" @property def is_configured(self) -> bool: return True def generate(self, prompt: str, context: str = "", **kwargs) -> LLMResponse: if context: sentences = [s.strip() for s in context.split('.') if len(s.strip()) > 20][:3] if sentences: response = f"Based on the documents: {sentences[0]}." if len(sentences) > 1: response += f" Additionally, {sentences[1].lower()}." else: response = "I found relevant information but cannot generate a detailed response in offline mode." else: response = ("I'm running in offline demo mode. Configure a free LLM provider " "(Groq, Gemini, OpenRouter, etc.) for AI-powered responses.") return LLMResponse(text=response, model="offline", provider=self.name, success=True) def embed(self, texts: List[str]) -> Tuple[List[List[float]], Optional[str]]: """Generate simple hash-based embeddings for demo.""" import hashlib embeddings = [] for text in texts: hash_bytes = hashlib.sha256(text.encode()).digest() embedding = [((b % 200) - 100) / 100.0 for b in (hash_bytes * 12)][:384] embeddings.append(embedding) return embeddings, None class UnifiedLLMProvider: """ Unified interface for all LLM providers. Automatically selects the best available provider based on configured API keys. Priority: Groq > Gemini > OpenRouter > GitHub > Together > Mistral > HuggingFace > Offline """ def __init__(self): self.providers: Dict[str, Any] = {} self.active_provider: Optional[str] = None self.active_embed_provider: Optional[str] = None self._init_providers() def _init_providers(self): """Initialize all available providers.""" # Initialize providers in priority order provider_classes = [ ("groq", GroqProvider), ("gemini", GoogleGeminiProvider), ("openrouter", OpenRouterProvider), ("github", GitHubModelsProvider), ("together", TogetherAIProvider), ("mistral", MistralAIProvider), ("huggingface", HuggingFaceProvider), ("offline", OfflineProvider), ] for name, cls in provider_classes: try: provider = cls() self.providers[name] = provider # Set active provider (first configured one) if provider.is_configured and not self.active_provider and name != "offline": self.active_provider = name logger.info(f"Active LLM provider: {provider.name}") except Exception as e: logger.warning(f"Failed to init {name}: {e}") # Fallback to offline if nothing configured if not self.active_provider: self.active_provider = "offline" logger.warning("No LLM API configured, using offline mode") # HuggingFace for embeddings (works without token too) self.active_embed_provider = "huggingface" def generate(self, prompt: str, provider: str = None, **kwargs) -> LLMResponse: """Generate text using specified or best available provider.""" provider_name = provider or self.active_provider if provider_name and provider_name in self.providers: response = self.providers[provider_name].generate(prompt, **kwargs) if response.success: return response logger.warning(f"{provider_name} failed: {response.error}") # Fallback chain for name in ["groq", "gemini", "openrouter", "huggingface", "offline"]: if name in self.providers and name != provider_name: response = self.providers[name].generate(prompt, **kwargs) if response.success: return response return self.providers["offline"].generate(prompt, **kwargs) def embed(self, texts: List[str]) -> Tuple[List[List[float]], Optional[str]]: """Generate embeddings.""" if self.active_embed_provider and self.active_embed_provider in self.providers: provider = self.providers[self.active_embed_provider] if hasattr(provider, 'embed'): result, error = provider.embed(texts) if not error: return result, None # Fallback to offline return self.providers["offline"].embed(texts) def get_status(self) -> Dict[str, Any]: """Get status of all providers.""" status = { "active_llm": self.active_provider, "active_llm_name": self.providers[self.active_provider].name if self.active_provider else "None", "active_embed": self.active_embed_provider, "providers": {} } for name, provider in self.providers.items(): status["providers"][name] = { "name": provider.name, "configured": provider.is_configured, } return status def list_available(self) -> List[str]: """List all configured providers.""" return [name for name, p in self.providers.items() if p.is_configured and name != "offline"] # Global instance _llm_provider: Optional[UnifiedLLMProvider] = None def get_llm_provider() -> UnifiedLLMProvider: """Get or create the unified LLM provider.""" global _llm_provider if _llm_provider is None: _llm_provider = UnifiedLLMProvider() return _llm_provider def generate_response(prompt: str, context: str = "", system_prompt: str = None) -> Tuple[str, Optional[str]]: """ Convenience function to generate a response. Args: prompt: User prompt context: Optional context from retrieved documents system_prompt: Optional system instruction Returns: Tuple of (response_text, error_message) """ provider = get_llm_provider() # Build full prompt with context if context: full_prompt = f"""Context from documents: {context} Question: {prompt} Please answer based on the context provided. If the answer is not in the context, say so.""" else: full_prompt = prompt if not system_prompt: system_prompt = "You are a helpful document analysis assistant. Provide accurate, concise answers based on the provided context." response = provider.generate(full_prompt, system_prompt=system_prompt) if response.success: return response.text, None else: return "", response.error