Spaces:
Running
Running
Pulastya B
feat: Initial commit - Data Science Agent with React frontend and FastAPI backend
226ac39
| """ | |
| Reasoning Module - Core Abstraction | |
| Provides clean separation between: | |
| - Deterministic data processing (tools) | |
| - Non-deterministic reasoning (LLM) | |
| Design Principles: | |
| - NO RAW DATA ACCESS - Only summaries/metadata | |
| - NO TRAINING DECISIONS - Only explanations | |
| - STRUCTURED I/O - JSON in, JSON + text out | |
| - CACHEABLE - Deterministic enough to cache | |
| - REASONING ONLY - No execution, no side effects | |
| Architecture: | |
| Tool → Generates Summary → Reasoning Module → Returns Explanation | |
| Tool: "Here's what I found: {stats}" | |
| Reasoning: "Based on these stats, this means..." | |
| Usage: | |
| from reasoning import get_reasoner | |
| reasoner = get_reasoner() | |
| result = reasoner.explain_data( | |
| summary={"rows": 1000, "columns": 20, "missing": 50} | |
| ) | |
| """ | |
| import os | |
| from typing import Dict, Any, Optional, Union | |
| from abc import ABC, abstractmethod | |
| class ReasoningBackend(ABC): | |
| """Abstract base class for reasoning backends.""" | |
| def generate( | |
| self, | |
| prompt: str, | |
| system_prompt: Optional[str] = None, | |
| temperature: float = 0.1, | |
| max_tokens: int = 2048 | |
| ) -> str: | |
| """Generate reasoning response.""" | |
| pass | |
| def generate_structured( | |
| self, | |
| prompt: str, | |
| schema: Dict[str, Any], | |
| system_prompt: Optional[str] = None | |
| ) -> Dict[str, Any]: | |
| """Generate structured JSON response.""" | |
| pass | |
| class GeminiBackend(ReasoningBackend): | |
| """Gemini reasoning backend.""" | |
| def __init__(self, api_key: Optional[str] = None, model: str = "gemini-2.0-flash-exp"): | |
| try: | |
| import google.generativeai as genai | |
| except ImportError: | |
| raise ImportError( | |
| "google-generativeai not installed. " | |
| "Install with: pip install google-generativeai" | |
| ) | |
| api_key = api_key or os.getenv("GOOGLE_API_KEY") | |
| if not api_key: | |
| raise ValueError( | |
| "Google API key required. Set GOOGLE_API_KEY env var or pass api_key" | |
| ) | |
| genai.configure(api_key=api_key) | |
| self.model = genai.GenerativeModel( | |
| model, | |
| generation_config={"temperature": 0.1} | |
| ) | |
| self.model_name = model | |
| def generate( | |
| self, | |
| prompt: str, | |
| system_prompt: Optional[str] = None, | |
| temperature: float = 0.1, | |
| max_tokens: int = 2048 | |
| ) -> str: | |
| """Generate reasoning response.""" | |
| # Combine system and user prompts | |
| full_prompt = prompt | |
| if system_prompt: | |
| full_prompt = f"{system_prompt}\n\n{prompt}" | |
| response = self.model.generate_content( | |
| full_prompt, | |
| generation_config={ | |
| "temperature": temperature, | |
| "max_output_tokens": max_tokens | |
| } | |
| ) | |
| return response.text | |
| def generate_structured( | |
| self, | |
| prompt: str, | |
| schema: Dict[str, Any], | |
| system_prompt: Optional[str] = None | |
| ) -> Dict[str, Any]: | |
| """Generate structured JSON response.""" | |
| import json | |
| # Add schema instruction | |
| schema_str = json.dumps(schema, indent=2) | |
| structured_prompt = f"""{prompt} | |
| Respond with valid JSON matching this schema: | |
| {schema_str} | |
| Your response must be valid JSON only, no other text.""" | |
| response_text = self.generate(structured_prompt, system_prompt) | |
| # Extract JSON from response | |
| try: | |
| # Try direct parse | |
| return json.loads(response_text) | |
| except json.JSONDecodeError: | |
| # Try to extract JSON from markdown code blocks | |
| import re | |
| json_match = re.search(r'```json\s*\n(.*?)\n```', response_text, re.DOTALL) | |
| if json_match: | |
| return json.loads(json_match.group(1)) | |
| # Try to extract any JSON object | |
| json_match = re.search(r'\{.*\}', response_text, re.DOTALL) | |
| if json_match: | |
| return json.loads(json_match.group(0)) | |
| raise ValueError(f"Failed to extract JSON from response: {response_text[:200]}...") | |
| class GroqBackend(ReasoningBackend): | |
| """Groq reasoning backend.""" | |
| def __init__(self, api_key: Optional[str] = None, model: str = "llama-3.3-70b-versatile"): | |
| try: | |
| from groq import Groq | |
| except ImportError: | |
| raise ImportError( | |
| "groq not installed. " | |
| "Install with: pip install groq" | |
| ) | |
| api_key = api_key or os.getenv("GROQ_API_KEY") | |
| if not api_key: | |
| raise ValueError( | |
| "Groq API key required. Set GROQ_API_KEY env var or pass api_key" | |
| ) | |
| self.client = Groq(api_key=api_key) | |
| self.model_name = model | |
| def generate( | |
| self, | |
| prompt: str, | |
| system_prompt: Optional[str] = None, | |
| temperature: float = 0.1, | |
| max_tokens: int = 2048 | |
| ) -> str: | |
| """Generate reasoning response.""" | |
| messages = [] | |
| if system_prompt: | |
| messages.append({"role": "system", "content": system_prompt}) | |
| messages.append({"role": "user", "content": prompt}) | |
| response = self.client.chat.completions.create( | |
| model=self.model_name, | |
| messages=messages, | |
| temperature=temperature, | |
| max_tokens=max_tokens | |
| ) | |
| return response.choices[0].message.content | |
| def generate_structured( | |
| self, | |
| prompt: str, | |
| schema: Dict[str, Any], | |
| system_prompt: Optional[str] = None | |
| ) -> Dict[str, Any]: | |
| """Generate structured JSON response.""" | |
| import json | |
| # Add schema instruction | |
| schema_str = json.dumps(schema, indent=2) | |
| structured_prompt = f"""{prompt} | |
| Respond with valid JSON matching this schema: | |
| {schema_str} | |
| Your response must be valid JSON only, no other text.""" | |
| response_text = self.generate(structured_prompt, system_prompt) | |
| # Extract JSON from response | |
| try: | |
| return json.loads(response_text) | |
| except json.JSONDecodeError: | |
| # Try to extract JSON from markdown code blocks | |
| import re | |
| json_match = re.search(r'```json\s*\n(.*?)\n```', response_text, re.DOTALL) | |
| if json_match: | |
| return json.loads(json_match.group(1)) | |
| # Try to extract any JSON object | |
| json_match = re.search(r'\{.*\}', response_text, re.DOTALL) | |
| if json_match: | |
| return json.loads(json_match.group(0)) | |
| raise ValueError(f"Failed to extract JSON from response: {response_text[:200]}...") | |
| class ReasoningEngine: | |
| """ | |
| Main reasoning engine. | |
| Delegates to appropriate backend (Gemini, Groq, etc). | |
| Provides high-level reasoning capabilities. | |
| """ | |
| def __init__( | |
| self, | |
| backend: Optional[ReasoningBackend] = None, | |
| provider: str = "gemini" | |
| ): | |
| """ | |
| Initialize reasoning engine. | |
| Args: | |
| backend: Custom backend instance | |
| provider: 'gemini' or 'groq' (if backend not provided) | |
| """ | |
| if backend: | |
| self.backend = backend | |
| else: | |
| provider = provider or os.getenv("LLM_PROVIDER", "gemini") | |
| if provider == "gemini": | |
| self.backend = GeminiBackend() | |
| elif provider == "groq": | |
| self.backend = GroqBackend() | |
| else: | |
| raise ValueError(f"Unsupported provider: {provider}") | |
| self.provider = provider | |
| def reason( | |
| self, | |
| prompt: str, | |
| system_prompt: Optional[str] = None, | |
| temperature: float = 0.1 | |
| ) -> str: | |
| """ | |
| General-purpose reasoning. | |
| Args: | |
| prompt: User prompt | |
| system_prompt: Optional system context | |
| temperature: Creativity (0.0 = deterministic, 1.0 = creative) | |
| Returns: | |
| Natural language response | |
| """ | |
| return self.backend.generate(prompt, system_prompt, temperature) | |
| def reason_structured( | |
| self, | |
| prompt: str, | |
| schema: Dict[str, Any], | |
| system_prompt: Optional[str] = None | |
| ) -> Dict[str, Any]: | |
| """ | |
| Structured reasoning with JSON output. | |
| Args: | |
| prompt: User prompt | |
| schema: Expected JSON schema | |
| system_prompt: Optional system context | |
| Returns: | |
| Parsed JSON response | |
| """ | |
| return self.backend.generate_structured(prompt, schema, system_prompt) | |
| # Singleton instance | |
| _reasoning_engine: Optional[ReasoningEngine] = None | |
| def get_reasoner( | |
| backend: Optional[ReasoningBackend] = None, | |
| provider: Optional[str] = None | |
| ) -> ReasoningEngine: | |
| """ | |
| Get singleton reasoning engine. | |
| Args: | |
| backend: Custom backend instance | |
| provider: 'gemini' or 'groq' | |
| Returns: | |
| ReasoningEngine instance | |
| """ | |
| global _reasoning_engine | |
| if _reasoning_engine is None or backend is not None: | |
| _reasoning_engine = ReasoningEngine(backend=backend, provider=provider) | |
| return _reasoning_engine | |
| def reset_reasoner(): | |
| """Reset singleton (for testing).""" | |
| global _reasoning_engine | |
| _reasoning_engine = None | |