SPOC_V1 / graph_config.py
JatinAutonomousLabs's picture
Update graph_config.py
aa162f5 verified
raw
history blame
12 kB
"""
graph_config.py - Centralized Graph Configuration
==================================================
All configurable parameters for the graph orchestration system.
Modify these values to change graph behavior.
Author: AI Lab Team
Last Updated: 2025-10-10
Version: 3.1 - Added legacy compatibility constants
"""
import os
from typing import Dict, Any, List
# ============================================================================
# TIER SYSTEM CONFIGURATION
# ============================================================================
# Tier definitions with clear value propositions
TIER_LITE = "lite"
TIER_STANDARD = "standard"
TIER_FULL = "full"
TIER_CONFIGS = {
TIER_LITE: {
"name": "Lite",
"description": "Fast, basic responses for simple queries",
"max_cost": 0.50,
"max_execution_time": 60, # seconds
"qa_rework_cycles": 1,
"includes_planning": False,
"includes_experiments": False,
"includes_monitoring": False,
"use_cases": ["Quick questions", "Simple summaries", "Basic info"]
},
TIER_STANDARD: {
"name": "Standard",
"description": "Balanced quality and speed for most tasks",
"max_cost": 5.00,
"max_execution_time": 180, # seconds
"qa_rework_cycles": 2,
"includes_planning": True,
"includes_experiments": True,
"includes_monitoring": False,
"use_cases": ["Code generation", "Research", "Document creation"]
},
TIER_FULL: {
"name": "Full",
"description": "Premium quality with comprehensive refinement",
"max_cost": None, # Unlimited
"max_execution_time": 900, # seconds
"qa_rework_cycles": 10,
"includes_planning": True,
"includes_experiments": True,
"includes_monitoring": True,
"use_cases": ["Complex projects", "Production code", "Detailed research"]
}
}
# Default tier
DEFAULT_TIER = TIER_STANDARD
# ============================================================================
# EXECUTION CONTROL
# ============================================================================
# Maximum execution path length (safety limit)
MAX_EXECUTION_PATH_LENGTH = 150
# Node timeout settings
NODE_TIMEOUT_SECONDS = 60
ALLOW_TIMEOUT_RECOVERY = True
# Cost estimation
GPT4O_INPUT_COST_PER_1K_TOKENS = 0.005
GPT4O_OUTPUT_COST_PER_1K_TOKENS = 0.015
AVG_TOKENS_PER_CALL = 2000
# Budget multipliers (legacy compatibility)
BUDGET_BUFFER_MULTIPLIER = 1.20
MAX_COST_MULTIPLIER = 1.20
# ============================================================================
# LANGUAGE & CODING CONFIGURATION
# ============================================================================
DEFAULT_LANGUAGE = "python"
SUPPORTED_LANGUAGES = [
"python", "javascript", "typescript", "java",
"go", "rust", "cpp", "csharp", "php", "ruby"
]
LANGUAGE_PREFERENCE_KEYWORDS = {
"python": ["python", "py", "prefer python", "use python"],
"javascript": ["javascript", "js", "node", "prefer javascript"],
"typescript": ["typescript", "ts", "prefer typescript"],
"java": ["java", "prefer java"],
"go": ["golang", "go", "prefer go"],
"rust": ["rust", "prefer rust"]
}
NO_CODE_KEYWORDS = [
'what is', 'what are', 'document', 'curriculum', 'explain', 'describe',
'summarize', 'list', 'tell me about', 'report',
'definition of', 'meaning of', 'overview of'
]
CODE_KEYWORDS = [
'script', 'function', 'api', 'backend', 'frontend',
'algorithm', 'program', 'code', 'implement',
'build', 'create app', 'develop', 'software'
]
# ============================================================================
# RESEARCH MODE CONFIGURATION
# ============================================================================
RESEARCH_KEYWORDS = [
'research', 'analyze', 'study', 'investigate', 'explore',
'survey', 'review', 'compare', 'evaluate', 'assess',
'trends', 'best practices', 'state of', 'landscape',
'comprehensive', 'detailed analysis', 'in-depth'
]
REPORT_KEYWORDS = [
'report', 'summary', 'findings', 'document',
'write up', 'present', 'deliverable', 'brief', 'whitepaper'
]
REQUIRE_CITATIONS_FOR_RESEARCH = True
MIN_SOURCES_PER_RESEARCH = 5
MAX_SOURCES_PER_RESEARCH = 25
CITATION_FORMAT = "APA"
WEB_SEARCH_ENABLED = True
WEB_SEARCH_API = "brave"
WEB_SEARCH_MAX_RESULTS = 25
WEB_SEARCH_TIMEOUT = 30 # seconds
# ============================================================================
# ARTIFACT TYPE CONFIGURATION
# ============================================================================
KNOWN_ARTIFACT_TYPES = {
"notebook", "script", "repo", "word",
"excel", "pdf", "image", "data"
}
ARTIFACT_TYPE_KEYWORDS = {
'notebook': ['notebook', 'jupyter', 'ipynb', 'visualization', 'chart', 'graph'],
'script': ['script', 'function', 'utility', 'tool'],
'repo': ['app', 'backend', 'frontend', 'api', 'repository', 'project'],
'word': ['document', 'report', 'research', 'analysis', 'doc', 'docx'],
'excel': ['spreadsheet', 'excel', 'xlsx', 'csv', 'data table'],
'pdf': ['pdf', 'printable']
}
DEFAULT_CODE_ARTIFACT = "script"
DEFAULT_RESEARCH_ARTIFACT = "word"
# ============================================================================
# FILE & ARTIFACT MANAGEMENT
# ============================================================================
OUT_DIR = os.environ.get("OUT_DIR", "outputs")
OUTPUTS_DIR = "outputs"
USER_ARTIFACTS_DIR = "outputs/user_artifacts"
EXPORTS_DIR = os.path.join(OUT_DIR, "exports")
FEEDBACK_STORAGE_DIR = "outputs/feedback"
ARTIFACT_REGISTRY_FILE = "outputs/artifact_registry.json"
ENSURE_ALL_ARTIFACTS_EXPORTED = True
ARTIFACT_RETENTION_DAYS = 30
MAX_ARTIFACT_SIZE_MB = 100
UUID_LENGTH = 8
FILENAME_SEPARATOR = "_"
TIMESTAMP_FORMAT = "%Y%m%d_%H%M%S"
# ============================================================================
# CONVERSATION CONTEXT MANAGEMENT
# ============================================================================
MAX_CONVERSATION_HISTORY = 25
CONTEXT_TOKEN_LIMIT = 40000
INCLUDE_ARTIFACTS_IN_CONTEXT = True
AUTO_SUMMARIZE_LONG_CONVERSATIONS = True
SUMMARIZE_AFTER_EXCHANGES = 5
FOLLOW_UP_KEYWORDS = [
'also', 'additionally', 'now', 'then', 'next',
'can you', 'please', 'what about', 'how about',
'add', 'include', 'expand', 'modify', 'change',
'update', 'improve', 'enhance', 'refine'
]
REFERENCE_PRONOUNS = ['it', 'that', 'this', 'they', 'them', 'those', 'these']
SHOW_CONTEXT_INDICATOR = True
CONTEXT_INDICATOR_NEW = "📊 **New conversation**"
CONTEXT_INDICATOR_FOLLOW_UP = "🔄 Follow-up detected"
CONTEXT_INDICATOR_FORMAT = "💬 **Context: {count} exchange(s)**"
# ============================================================================
# EXECUTION MODES
# ============================================================================
EXECUTION_MODES = {
"research": {
"description": "Factual research with citations",
"needs_code": False,
"requires_citations": True,
"uses_web_search": True,
"output_formats": ["document", "response"]
},
"coding": {
"description": "Code generation and implementation",
"needs_code": True,
"requires_citations": False,
"uses_web_search": False,
"output_formats": ["script", "notebook", "repo"]
},
"hybrid": {
"description": "Research with code examples",
"needs_code": True,
"requires_citations": True,
"uses_web_search": True,
"output_formats": ["document", "notebook"]
},
"simple_response": {
"description": "Quick text response",
"needs_code": False,
"requires_citations": False,
"uses_web_search": False,
"output_formats": ["text"]
}
}
# ============================================================================
# NODE STATUS MESSAGES
# ============================================================================
NODE_STATUS = {
"memory": "Retrieving context...",
"intent": "Clarifying objective...",
"pm": "Planning execution...",
"pragmatist": "Assessing feasibility...",
"experimenter": "Generating artifacts...",
"research": "Conducting research...",
"synthesis": "Synthesizing response...",
"qa": "Reviewing quality...",
"observer": "Monitoring performance...",
"archive": "Saving to memory..."
}
COMPLETION_SUCCESS = "✅ Task completed successfully"
COMPLETION_WITH_WARNING = "⚠️ Task completed with limitations"
COMPLETION_FAILED = "❌ Task could not be completed"
# ============================================================================
# ERROR HANDLING
# ============================================================================
ERROR_NO_LLM = "LLM not available"
ERROR_TIMEOUT = "Operation timed out"
ERROR_BUDGET_EXCEEDED = "Budget limit exceeded"
ERROR_PARSE_FAILED = "Failed to parse LLM response"
ERROR_ARTIFACT_CREATION = "Failed to create artifact"
MAX_RETRIES = 3
RETRY_DELAY_SECONDS = 2
EXPONENTIAL_BACKOFF = True
# ============================================================================
# LOGGING CONFIGURATION
# ============================================================================
TIER_LOG_LEVELS = {
TIER_LITE: "WARNING",
TIER_STANDARD: "INFO",
TIER_FULL: "DEBUG"
}
LOG_SEPARATOR = "=" * 60
LOG_SEPARATOR_SHORT = "-" * 40
LOG_PERFORMANCE_METRICS = True
LOG_TOKEN_USAGE = True
LOG_COST_TRACKING = True
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def get_tier_config(tier: str) -> Dict[str, Any]:
"""Get configuration for specified tier."""
return TIER_CONFIGS.get(tier, TIER_CONFIGS[DEFAULT_TIER])
def calculate_cost_per_call(tokens: int = None) -> float:
tokens = tokens or AVG_TOKENS_PER_CALL
avg_cost_per_1k = (GPT4O_INPUT_COST_PER_1K_TOKENS + GPT4O_OUTPUT_COST_PER_1K_TOKENS) / 2.0
return (tokens / 1000.0) * avg_cost_per_1k
def is_tier_unlimited(tier: str) -> bool:
config = get_tier_config(tier)
return config["max_cost"] is None
def get_max_rework_cycles(tier: str) -> int:
config = get_tier_config(tier)
return config["qa_rework_cycles"]
def should_include_monitoring(tier: str) -> bool:
config = get_tier_config(tier)
return config["includes_monitoring"]
def validate_tier(tier: str) -> str:
tier_lower = tier.lower() if tier else DEFAULT_TIER
if tier_lower not in TIER_CONFIGS:
return DEFAULT_TIER
return tier_lower
# ============================================================================
# LEGACY COMPATIBILITY CONSTANTS (to prevent AttributeError)
# ============================================================================
# Maintain backward compatibility with older modules (e.g., app_gradio.py)
INITIAL_MAX_REWORK_CYCLES = get_max_rework_cycles(DEFAULT_TIER)
# Explicitly re-export legacy cost multipliers for older modules
LEGACY_BUDGET_BUFFER_MULTIPLIER = BUDGET_BUFFER_MULTIPLIER
LEGACY_MAX_COST_MULTIPLIER = MAX_COST_MULTIPLIER
# ============================================================================
# EXPORTS
# ============================================================================
__all__ = [
'TIER_LITE', 'TIER_STANDARD', 'TIER_FULL', 'TIER_CONFIGS', 'DEFAULT_TIER',
'MAX_EXECUTION_PATH_LENGTH', 'NODE_TIMEOUT_SECONDS',
'DEFAULT_LANGUAGE', 'SUPPORTED_LANGUAGES', 'NO_CODE_KEYWORDS', 'CODE_KEYWORDS',
'RESEARCH_KEYWORDS', 'REQUIRE_CITATIONS_FOR_RESEARCH', 'WEB_SEARCH_ENABLED',
'KNOWN_ARTIFACT_TYPES', 'USER_ARTIFACTS_DIR', 'MAX_CONVERSATION_HISTORY',
'FOLLOW_UP_KEYWORDS', 'get_tier_config', 'calculate_cost_per_call',
'is_tier_unlimited', 'get_max_rework_cycles', 'should_include_monitoring',
'validate_tier', 'INITIAL_MAX_REWORK_CYCLES',
'LEGACY_BUDGET_BUFFER_MULTIPLIER', 'LEGACY_MAX_COST_MULTIPLIER'
]