# multi_language_support.py """ Multi-Language Support Module for AI Lab Extends the system to support all major programming languages """ import os import re import json import subprocess import tempfile import uuid from typing import Dict, Optional, Tuple, List from dataclasses import dataclass import logging log = logging.getLogger(__name__) # Language configurations @dataclass class LanguageConfig: name: str extensions: List[str] keywords: List[str] run_command: str compile_command: Optional[str] = None docker_image: Optional[str] = None repl_available: bool = False LANGUAGES = { "python": LanguageConfig( name="Python", extensions=[".py"], keywords=["python", "py", "pip", "import", "def", "pandas", "numpy"], run_command="python3 {file}", docker_image="python:3.11", repl_available=True ), "javascript": LanguageConfig( name="JavaScript", extensions=[".js", ".mjs"], keywords=["javascript", "js", "node", "npm", "const", "let", "var", "function", "console.log"], run_command="node {file}", docker_image="node:20", repl_available=True ), "typescript": LanguageConfig( name="TypeScript", extensions=[".ts"], keywords=["typescript", "ts", "interface", "type", "enum"], run_command="npx ts-node {file}", compile_command="tsc {file}", docker_image="node:20" ), "java": LanguageConfig( name="Java", extensions=[".java"], keywords=["java", "class", "public static void main", "System.out.println"], run_command="java {classname}", compile_command="javac {file}", docker_image="openjdk:17" ), "csharp": LanguageConfig( name="C#", extensions=[".cs"], keywords=["c#", "csharp", "dotnet", "using System", "namespace", "Console.WriteLine"], run_command="dotnet run", compile_command="dotnet build", docker_image="mcr.microsoft.com/dotnet/sdk:7.0" ), "cpp": LanguageConfig( name="C++", extensions=[".cpp", ".cc", ".cxx"], keywords=["c++", "cpp", "iostream", "std::", "cout", "#include"], run_command="./{executable}", compile_command="g++ -o {executable} {file}", docker_image="gcc:latest" ), "c": LanguageConfig( name="C", extensions=[".c"], keywords=["c language", "printf", "scanf", "#include "], run_command="./{executable}", compile_command="gcc -o {executable} {file}", docker_image="gcc:latest" ), "go": LanguageConfig( name="Go", extensions=[".go"], keywords=["golang", "go", "package main", "fmt.Println"], run_command="go run {file}", docker_image="golang:1.21" ), "rust": LanguageConfig( name="Rust", extensions=[".rs"], keywords=["rust", "cargo", "println!", "fn main"], run_command="cargo run", compile_command="rustc {file}", docker_image="rust:latest" ), "ruby": LanguageConfig( name="Ruby", extensions=[".rb"], keywords=["ruby", "puts", "def", "end", "gem"], run_command="ruby {file}", docker_image="ruby:3.2", repl_available=True ), "php": LanguageConfig( name="PHP", extensions=[".php"], keywords=["php", " Optional[str]: """ Detect programming language from text content or filename """ text_lower = text.lower() if text else "" # Check filename extension first if filename: ext = os.path.splitext(filename)[1].lower() for lang_id, config in LANGUAGES.items(): if ext in config.extensions: return lang_id # Count keyword matches for each language scores = {} for lang_id, config in LANGUAGES.items(): score = 0 for keyword in config.keywords: if keyword in text_lower: score += 1 if score > 0: scores[lang_id] = score # Return language with highest score if scores: return max(scores, key=scores.get) return None def extract_code_blocks_multi_lang(text: str) -> List[Tuple[str, str]]: """ Extract code blocks with their languages from markdown text Returns list of (language, code) tuples """ blocks = [] # Match ```language\ncode\n``` pattern = r"```(\w+)?\s*\n(.*?)\n```" matches = re.finditer(pattern, text, re.DOTALL) for match in matches: lang_hint = match.group(1) or "" code = match.group(2) # Try to detect language detected_lang = None if lang_hint: # Map common aliases lang_map = { "js": "javascript", "ts": "typescript", "py": "python", "cs": "csharp", "cpp": "cpp", "c++": "cpp", "rb": "ruby", "rs": "rust" } detected_lang = lang_map.get(lang_hint.lower(), lang_hint.lower()) if not detected_lang: detected_lang = detect_language(code) or "text" blocks.append((detected_lang, code)) # If no code blocks, try to detect language in plain text if not blocks and text.strip(): lang = detect_language(text) if lang: blocks.append((lang, text)) return blocks def execute_code(code: str, language: str, timeout: int = 30) -> Dict: """ Execute code in any supported language Returns dict with stdout, stderr, exit_code """ if language not in LANGUAGES: return { "stdout": "", "stderr": f"Unsupported language: {language}", "exit_code": 1, "language": language } config = LANGUAGES[language] result = {"language": language, "stdout": "", "stderr": "", "exit_code": 0} try: with tempfile.TemporaryDirectory() as tmpdir: # Determine file extension ext = config.extensions[0] file_path = os.path.join(tmpdir, f"code{ext}") # Handle special cases if language == "java": # Extract class name from code class_match = re.search(r"public\s+class\s+(\w+)", code) if class_match: class_name = class_match.group(1) file_path = os.path.join(tmpdir, f"{class_name}.java") else: class_name = "Main" # Wrap code in a class if needed if "public class" not in code: code = f"public class Main {{\n{code}\n}}" file_path = os.path.join(tmpdir, "Main.java") # Write code to file with open(file_path, "w") as f: f.write(code) # Compile if needed if config.compile_command: if language == "cpp" or language == "c": executable = os.path.join(tmpdir, "program") compile_cmd = config.compile_command.format( file=file_path, executable=executable ) elif language == "java": compile_cmd = config.compile_command.format(file=file_path) else: compile_cmd = config.compile_command.format(file=file_path) proc = subprocess.run( compile_cmd.split(), capture_output=True, text=True, timeout=timeout, cwd=tmpdir ) if proc.returncode != 0: result["stderr"] = proc.stderr result["exit_code"] = proc.returncode return result # Run the code if language == "java": run_cmd = config.run_command.format(classname=class_name.replace(".java", "")) elif language in ["cpp", "c"]: run_cmd = config.run_command.format(executable=executable) else: run_cmd = config.run_command.format(file=file_path) proc = subprocess.run( run_cmd.split(), capture_output=True, text=True, timeout=timeout, cwd=tmpdir ) result["stdout"] = proc.stdout result["stderr"] = proc.stderr result["exit_code"] = proc.returncode except subprocess.TimeoutExpired: result["stderr"] = f"Execution timeout ({timeout}s)" result["exit_code"] = -1 except Exception as e: result["stderr"] = str(e) result["exit_code"] = -1 return result def execute_code_docker(code: str, language: str, timeout: int = 30) -> Dict: """ Execute code in Docker container for better isolation Requires Docker to be installed and running """ if language not in LANGUAGES or not LANGUAGES[language].docker_image: return execute_code(code, language, timeout) config = LANGUAGES[language] try: import docker client = docker.from_env() # Create container with code container = client.containers.run( config.docker_image, command=f"sh -c 'echo \"{code}\" > /tmp/code{config.extensions[0]} && {config.run_command.format(file='/tmp/code' + config.extensions[0])}'", detach=True, remove=False ) # Wait for completion result = container.wait(timeout=timeout) logs = container.logs() container.remove() return { "language": language, "stdout": logs.decode('utf-8'), "stderr": "", "exit_code": result['StatusCode'] } except Exception as e: log.warning(f"Docker execution failed, falling back to local: {e}") return execute_code(code, language, timeout) # Enhanced artifact type detection def detect_requested_output_types_enhanced(text: str) -> Dict: """ Enhanced detection that recognizes multiple programming languages """ if not text: return {"requires_artifact": False, "artifact_type": None, "artifact_hint": None, "language": None} t = text.lower() # Notebooks if any(k in t for k in ["jupyter notebook", "jupyter", "notebook", "ipynb"]): return {"requires_artifact": True, "artifact_type": "notebook", "artifact_hint": "jupyter notebook", "language": "python"} # Excel/CSV if any(k in t for k in ["excel", ".xlsx", "spreadsheet", "csv"]): return {"requires_artifact": True, "artifact_type": "excel", "artifact_hint": "Excel file", "language": None} # Documents if any(k in t for k in ["word document", ".docx", "docx"]): return {"requires_artifact": True, "artifact_type": "word", "artifact_hint": "Word document", "language": None} if any(k in t for k in ["pdf", "pdf file"]): return {"requires_artifact": True, "artifact_type": "pdf", "artifact_hint": "PDF document", "language": None} # Repository if any(k in t for k in ["repo", "repository", "app repo", "backend", "codebase", "project"]): return {"requires_artifact": True, "artifact_type": "repo", "artifact_hint": "application repository", "language": detect_language(text)} # Scripts - detect specific language for lang_id, config in LANGUAGES.items(): for keyword in config.keywords: if keyword in t: return { "requires_artifact": True, "artifact_type": "script", "artifact_hint": f"{config.name} script", "language": lang_id } # Generic script request if any(k in t for k in ["script", "program", "code", "function", "convert", "translate"]): lang = detect_language(text) return { "requires_artifact": True, "artifact_type": "script", "artifact_hint": f"{LANGUAGES[lang].name if lang else 'Code'} script", "language": lang } return {"requires_artifact": False, "artifact_type": None, "artifact_hint": None, "language": None} # Update the write_script function def write_script_multi_lang(code_text: str, language: Optional[str] = None, out_dir: Optional[str] = None) -> str: """ Write script file with appropriate extension based on language """ out_dir = out_dir or "/tmp" os.makedirs(out_dir, exist_ok=True) if language and language in LANGUAGES: ext = LANGUAGES[language].extensions[0] else: # Try to detect from code detected = detect_language(code_text) if detected and detected in LANGUAGES: ext = LANGUAGES[detected].extensions[0] else: ext = ".txt" uid = uuid.uuid4().hex[:10] filename = os.path.join(out_dir, f"generated_script_{uid}{ext}") with open(filename, "w", encoding="utf-8") as f: f.write(code_text) return filename # Integration function for existing graph.py def apply_multi_language_support(): """ Monkey-patch existing functions to support multiple languages """ import graph as graph_module # Replace functions graph_module.detect_requested_output_types = detect_requested_output_types_enhanced graph_module.write_script = write_script_multi_lang # Add new execute function that handles all languages original_execute = getattr(graph_module, 'execute_python_code', None) def execute_any_code(code: str, language: str = None) -> Dict: if not language: language = detect_language(code) or "python" if language == "python" and original_execute: # Use existing Python executor for backwards compatibility return original_execute(code) else: return execute_code(code, language) graph_module.execute_code = execute_any_code log.info(f"Multi-language support enabled for {len(LANGUAGES)} languages") return True # Sample usage and tests if __name__ == "__main__": # Test language detection test_texts = [ ("print('Hello World')", "python"), ("console.log('Hello World')", "javascript"), ("System.out.println('Hello World');", "java"), ("cout << 'Hello World' << endl;", "cpp"), ("fmt.Println('Hello World')", "go"), ("puts 'Hello World'", "ruby"), ("SELECT * FROM users WHERE age > 18", "sql") ] print("Language Detection Tests:") for text, expected in test_texts: detected = detect_language(text) print(f" {expected}: {'✓' if detected == expected else 'X'} (detected: {detected})") # Test code execution print("\nCode Execution Tests:") samples = [ ("python", "print('Hello from Python')"), ("javascript", "console.log('Hello from JavaScript')"), ("ruby", "puts 'Hello from Ruby'"), ("go", "package main\nimport \"fmt\"\nfunc main() { fmt.Println(\"Hello from Go\") }"), ] for lang, code in samples: result = execute_code(code, lang) status = "✓" if result["exit_code"] == 0 else "X" print(f" {lang}: {status} - {result.get('stdout', '').strip()}")