Spaces:

JatinAutonomousLabs
/

SPOC_V1

Paused

File size: 12,011 Bytes

"""
graph_config.py - Centralized Graph Configuration
==================================================

All configurable parameters for the graph orchestration system.
Modify these values to change graph behavior.

Author: AI Lab Team
Last Updated: 2025-10-10
Version: 3.1 - Added legacy compatibility constants
"""

import os
from typing import Dict, Any, List

# ============================================================================
# TIER SYSTEM CONFIGURATION
# ============================================================================

# Tier definitions with clear value propositions
TIER_LITE = "lite"
TIER_STANDARD = "standard"
TIER_FULL = "full"

TIER_CONFIGS = {
    TIER_LITE: {
        "name": "Lite",
        "description": "Fast, basic responses for simple queries",
        "max_cost": 0.50,
        "max_execution_time": 60,  # seconds
        "qa_rework_cycles": 1,
        "includes_planning": False,
        "includes_experiments": False,
        "includes_monitoring": False,
        "use_cases": ["Quick questions", "Simple summaries", "Basic info"]
    },
    TIER_STANDARD: {
        "name": "Standard",
        "description": "Balanced quality and speed for most tasks",
        "max_cost": 5.00,
        "max_execution_time": 180,  # seconds
        "qa_rework_cycles": 2,
        "includes_planning": True,
        "includes_experiments": True,
        "includes_monitoring": False,
        "use_cases": ["Code generation", "Research", "Document creation"]
    },
    TIER_FULL: {
        "name": "Full",
        "description": "Premium quality with comprehensive refinement",
        "max_cost": None,  # Unlimited
        "max_execution_time": 900,  # seconds
        "qa_rework_cycles": 10,
        "includes_planning": True,
        "includes_experiments": True,
        "includes_monitoring": True,
        "use_cases": ["Complex projects", "Production code", "Detailed research"]
    }
}

# Default tier
DEFAULT_TIER = TIER_STANDARD


# ============================================================================
# EXECUTION CONTROL
# ============================================================================

# Maximum execution path length (safety limit)
MAX_EXECUTION_PATH_LENGTH = 150

# Node timeout settings
NODE_TIMEOUT_SECONDS = 60
ALLOW_TIMEOUT_RECOVERY = True

# Cost estimation
GPT4O_INPUT_COST_PER_1K_TOKENS = 0.005
GPT4O_OUTPUT_COST_PER_1K_TOKENS = 0.015
AVG_TOKENS_PER_CALL = 2000

# Budget multipliers (legacy compatibility)
BUDGET_BUFFER_MULTIPLIER = 1.20
MAX_COST_MULTIPLIER = 1.20


# ============================================================================
# LANGUAGE & CODING CONFIGURATION
# ============================================================================

DEFAULT_LANGUAGE = "python"

SUPPORTED_LANGUAGES = [
    "python", "javascript", "typescript", "java", 
    "go", "rust", "cpp", "csharp", "php", "ruby"
]

LANGUAGE_PREFERENCE_KEYWORDS = {
    "python": ["python", "py", "prefer python", "use python"],
    "javascript": ["javascript", "js", "node", "prefer javascript"],
    "typescript": ["typescript", "ts", "prefer typescript"],
    "java": ["java", "prefer java"],
    "go": ["golang", "go", "prefer go"],
    "rust": ["rust", "prefer rust"]
}

NO_CODE_KEYWORDS = [
    'what is', 'what are', 'document', 'curriculum', 'explain', 'describe', 
    'summarize', 'list', 'tell me about', 'report',
    'definition of', 'meaning of', 'overview of'
]

CODE_KEYWORDS = [
    'script', 'function', 'api', 'backend', 'frontend',
    'algorithm', 'program', 'code', 'implement',
    'build', 'create app', 'develop', 'software'
]


# ============================================================================
# RESEARCH MODE CONFIGURATION
# ============================================================================

RESEARCH_KEYWORDS = [
    'research', 'analyze', 'study', 'investigate', 'explore',
    'survey', 'review', 'compare', 'evaluate', 'assess',
    'trends', 'best practices', 'state of', 'landscape',
    'comprehensive', 'detailed analysis', 'in-depth'
]

REPORT_KEYWORDS = [
    'report', 'summary', 'findings', 'document',
    'write up', 'present', 'deliverable', 'brief', 'whitepaper'
]

REQUIRE_CITATIONS_FOR_RESEARCH = True
MIN_SOURCES_PER_RESEARCH = 5
MAX_SOURCES_PER_RESEARCH = 25
CITATION_FORMAT = "APA"

WEB_SEARCH_ENABLED = True
WEB_SEARCH_API = "brave"
WEB_SEARCH_MAX_RESULTS = 25
WEB_SEARCH_TIMEOUT = 30  # seconds


# ============================================================================
# ARTIFACT TYPE CONFIGURATION
# ============================================================================

KNOWN_ARTIFACT_TYPES = {
    "notebook", "script", "repo", "word", 
    "excel", "pdf", "image", "data"
}

ARTIFACT_TYPE_KEYWORDS = {
    'notebook': ['notebook', 'jupyter', 'ipynb', 'visualization', 'chart', 'graph'],
    'script': ['script', 'function', 'utility', 'tool'],
    'repo': ['app', 'backend', 'frontend', 'api', 'repository', 'project'],
    'word': ['document', 'report', 'research', 'analysis', 'doc', 'docx'],
    'excel': ['spreadsheet', 'excel', 'xlsx', 'csv', 'data table'],
    'pdf': ['pdf', 'printable']
}

DEFAULT_CODE_ARTIFACT = "script"
DEFAULT_RESEARCH_ARTIFACT = "word"


# ============================================================================
# FILE & ARTIFACT MANAGEMENT
# ============================================================================

OUT_DIR = os.environ.get("OUT_DIR", "outputs")
OUTPUTS_DIR = "outputs"
USER_ARTIFACTS_DIR = "outputs/user_artifacts"
EXPORTS_DIR = os.path.join(OUT_DIR, "exports")
FEEDBACK_STORAGE_DIR = "outputs/feedback"

ARTIFACT_REGISTRY_FILE = "outputs/artifact_registry.json"
ENSURE_ALL_ARTIFACTS_EXPORTED = True
ARTIFACT_RETENTION_DAYS = 30
MAX_ARTIFACT_SIZE_MB = 100

UUID_LENGTH = 8
FILENAME_SEPARATOR = "_"
TIMESTAMP_FORMAT = "%Y%m%d_%H%M%S"


# ============================================================================
# CONVERSATION CONTEXT MANAGEMENT
# ============================================================================

MAX_CONVERSATION_HISTORY = 25
CONTEXT_TOKEN_LIMIT = 40000
INCLUDE_ARTIFACTS_IN_CONTEXT = True
AUTO_SUMMARIZE_LONG_CONVERSATIONS = True
SUMMARIZE_AFTER_EXCHANGES = 5

FOLLOW_UP_KEYWORDS = [
    'also', 'additionally', 'now', 'then', 'next',
    'can you', 'please', 'what about', 'how about',
    'add', 'include', 'expand', 'modify', 'change',
    'update', 'improve', 'enhance', 'refine'
]

REFERENCE_PRONOUNS = ['it', 'that', 'this', 'they', 'them', 'those', 'these']

SHOW_CONTEXT_INDICATOR = True
CONTEXT_INDICATOR_NEW = "📊 **New conversation**"
CONTEXT_INDICATOR_FOLLOW_UP = "🔄 Follow-up detected"
CONTEXT_INDICATOR_FORMAT = "💬 **Context: {count} exchange(s)**"


# ============================================================================
# EXECUTION MODES
# ============================================================================

EXECUTION_MODES = {
    "research": {
        "description": "Factual research with citations",
        "needs_code": False,
        "requires_citations": True,
        "uses_web_search": True,
        "output_formats": ["document", "response"]
    },
    "coding": {
        "description": "Code generation and implementation",
        "needs_code": True,
        "requires_citations": False,
        "uses_web_search": False,
        "output_formats": ["script", "notebook", "repo"]
    },
    "hybrid": {
        "description": "Research with code examples",
        "needs_code": True,
        "requires_citations": True,
        "uses_web_search": True,
        "output_formats": ["document", "notebook"]
    },
    "simple_response": {
        "description": "Quick text response",
        "needs_code": False,
        "requires_citations": False,
        "uses_web_search": False,
        "output_formats": ["text"]
    }
}


# ============================================================================
# NODE STATUS MESSAGES
# ============================================================================

NODE_STATUS = {
    "memory": "Retrieving context...",
    "intent": "Clarifying objective...",
    "pm": "Planning execution...",
    "pragmatist": "Assessing feasibility...",
    "experimenter": "Generating artifacts...",
    "research": "Conducting research...",
    "synthesis": "Synthesizing response...",
    "qa": "Reviewing quality...",
    "observer": "Monitoring performance...",
    "archive": "Saving to memory..."
}

COMPLETION_SUCCESS = "✅ Task completed successfully"
COMPLETION_WITH_WARNING = "⚠️ Task completed with limitations"
COMPLETION_FAILED = "❌ Task could not be completed"


# ============================================================================
# ERROR HANDLING
# ============================================================================

ERROR_NO_LLM = "LLM not available"
ERROR_TIMEOUT = "Operation timed out"
ERROR_BUDGET_EXCEEDED = "Budget limit exceeded"
ERROR_PARSE_FAILED = "Failed to parse LLM response"
ERROR_ARTIFACT_CREATION = "Failed to create artifact"

MAX_RETRIES = 3
RETRY_DELAY_SECONDS = 2
EXPONENTIAL_BACKOFF = True


# ============================================================================
# LOGGING CONFIGURATION
# ============================================================================

TIER_LOG_LEVELS = {
    TIER_LITE: "WARNING",
    TIER_STANDARD: "INFO",
    TIER_FULL: "DEBUG"
}

LOG_SEPARATOR = "=" * 60
LOG_SEPARATOR_SHORT = "-" * 40
LOG_PERFORMANCE_METRICS = True
LOG_TOKEN_USAGE = True
LOG_COST_TRACKING = True


# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

def get_tier_config(tier: str) -> Dict[str, Any]:
    """Get configuration for specified tier."""
    return TIER_CONFIGS.get(tier, TIER_CONFIGS[DEFAULT_TIER])


def calculate_cost_per_call(tokens: int = None) -> float:
    tokens = tokens or AVG_TOKENS_PER_CALL
    avg_cost_per_1k = (GPT4O_INPUT_COST_PER_1K_TOKENS + GPT4O_OUTPUT_COST_PER_1K_TOKENS) / 2.0
    return (tokens / 1000.0) * avg_cost_per_1k


def is_tier_unlimited(tier: str) -> bool:
    config = get_tier_config(tier)
    return config["max_cost"] is None


def get_max_rework_cycles(tier: str) -> int:
    config = get_tier_config(tier)
    return config["qa_rework_cycles"]


def should_include_monitoring(tier: str) -> bool:
    config = get_tier_config(tier)
    return config["includes_monitoring"]


def validate_tier(tier: str) -> str:
    tier_lower = tier.lower() if tier else DEFAULT_TIER
    if tier_lower not in TIER_CONFIGS:
        return DEFAULT_TIER
    return tier_lower


# ============================================================================
# LEGACY COMPATIBILITY CONSTANTS (to prevent AttributeError)
# ============================================================================

# Maintain backward compatibility with older modules (e.g., app_gradio.py)
INITIAL_MAX_REWORK_CYCLES = get_max_rework_cycles(DEFAULT_TIER)
# Explicitly re-export legacy cost multipliers for older modules
LEGACY_BUDGET_BUFFER_MULTIPLIER = BUDGET_BUFFER_MULTIPLIER
LEGACY_MAX_COST_MULTIPLIER = MAX_COST_MULTIPLIER


# ============================================================================
# EXPORTS
# ============================================================================

__all__ = [
    'TIER_LITE', 'TIER_STANDARD', 'TIER_FULL', 'TIER_CONFIGS', 'DEFAULT_TIER',
    'MAX_EXECUTION_PATH_LENGTH', 'NODE_TIMEOUT_SECONDS',
    'DEFAULT_LANGUAGE', 'SUPPORTED_LANGUAGES', 'NO_CODE_KEYWORDS', 'CODE_KEYWORDS',
    'RESEARCH_KEYWORDS', 'REQUIRE_CITATIONS_FOR_RESEARCH', 'WEB_SEARCH_ENABLED',
    'KNOWN_ARTIFACT_TYPES', 'USER_ARTIFACTS_DIR', 'MAX_CONVERSATION_HISTORY',
    'FOLLOW_UP_KEYWORDS', 'get_tier_config', 'calculate_cost_per_call',
    'is_tier_unlimited', 'get_max_rework_cycles', 'should_include_monitoring',
    'validate_tier', 'INITIAL_MAX_REWORK_CYCLES',
    'LEGACY_BUDGET_BUFFER_MULTIPLIER', 'LEGACY_MAX_COST_MULTIPLIER'
]