Spaces:
Paused
Paused
| # multi_language_support.py | |
| """ | |
| Multi-Language Support Module for AI Lab | |
| Extends the system to support all major programming languages | |
| """ | |
| import os | |
| import re | |
| import json | |
| import subprocess | |
| import tempfile | |
| import uuid | |
| from typing import Dict, Optional, Tuple, List | |
| from dataclasses import dataclass | |
| import logging | |
| log = logging.getLogger(__name__) | |
| # Language configurations | |
| class LanguageConfig: | |
| name: str | |
| extensions: List[str] | |
| keywords: List[str] | |
| run_command: str | |
| compile_command: Optional[str] = None | |
| docker_image: Optional[str] = None | |
| repl_available: bool = False | |
| LANGUAGES = { | |
| "python": LanguageConfig( | |
| name="Python", | |
| extensions=[".py"], | |
| keywords=["python", "py", "pip", "import", "def", "pandas", "numpy"], | |
| run_command="python3 {file}", | |
| docker_image="python:3.11", | |
| repl_available=True | |
| ), | |
| "javascript": LanguageConfig( | |
| name="JavaScript", | |
| extensions=[".js", ".mjs"], | |
| keywords=["javascript", "js", "node", "npm", "const", "let", "var", "function", "console.log"], | |
| run_command="node {file}", | |
| docker_image="node:20", | |
| repl_available=True | |
| ), | |
| "typescript": LanguageConfig( | |
| name="TypeScript", | |
| extensions=[".ts"], | |
| keywords=["typescript", "ts", "interface", "type", "enum"], | |
| run_command="npx ts-node {file}", | |
| compile_command="tsc {file}", | |
| docker_image="node:20" | |
| ), | |
| "java": LanguageConfig( | |
| name="Java", | |
| extensions=[".java"], | |
| keywords=["java", "class", "public static void main", "System.out.println"], | |
| run_command="java {classname}", | |
| compile_command="javac {file}", | |
| docker_image="openjdk:17" | |
| ), | |
| "csharp": LanguageConfig( | |
| name="C#", | |
| extensions=[".cs"], | |
| keywords=["c#", "csharp", "dotnet", "using System", "namespace", "Console.WriteLine"], | |
| run_command="dotnet run", | |
| compile_command="dotnet build", | |
| docker_image="mcr.microsoft.com/dotnet/sdk:7.0" | |
| ), | |
| "cpp": LanguageConfig( | |
| name="C++", | |
| extensions=[".cpp", ".cc", ".cxx"], | |
| keywords=["c++", "cpp", "iostream", "std::", "cout", "#include"], | |
| run_command="./{executable}", | |
| compile_command="g++ -o {executable} {file}", | |
| docker_image="gcc:latest" | |
| ), | |
| "c": LanguageConfig( | |
| name="C", | |
| extensions=[".c"], | |
| keywords=["c language", "printf", "scanf", "#include <stdio.h>"], | |
| run_command="./{executable}", | |
| compile_command="gcc -o {executable} {file}", | |
| docker_image="gcc:latest" | |
| ), | |
| "go": LanguageConfig( | |
| name="Go", | |
| extensions=[".go"], | |
| keywords=["golang", "go", "package main", "fmt.Println"], | |
| run_command="go run {file}", | |
| docker_image="golang:1.21" | |
| ), | |
| "rust": LanguageConfig( | |
| name="Rust", | |
| extensions=[".rs"], | |
| keywords=["rust", "cargo", "println!", "fn main"], | |
| run_command="cargo run", | |
| compile_command="rustc {file}", | |
| docker_image="rust:latest" | |
| ), | |
| "ruby": LanguageConfig( | |
| name="Ruby", | |
| extensions=[".rb"], | |
| keywords=["ruby", "puts", "def", "end", "gem"], | |
| run_command="ruby {file}", | |
| docker_image="ruby:3.2", | |
| repl_available=True | |
| ), | |
| "php": LanguageConfig( | |
| name="PHP", | |
| extensions=[".php"], | |
| keywords=["php", "<?php", "echo", "function", "$"], | |
| run_command="php {file}", | |
| docker_image="php:8.2", | |
| repl_available=True | |
| ), | |
| "r": LanguageConfig( | |
| name="R", | |
| extensions=[".r", ".R"], | |
| keywords=["r language", "rlang", "ddply", "ggplot", "data.frame", "<-"], | |
| run_command="Rscript {file}", | |
| docker_image="r-base:latest", | |
| repl_available=True | |
| ), | |
| "julia": LanguageConfig( | |
| name="Julia", | |
| extensions=[".jl"], | |
| keywords=["julia", "println", "function", "end"], | |
| run_command="julia {file}", | |
| docker_image="julia:latest", | |
| repl_available=True | |
| ), | |
| "swift": LanguageConfig( | |
| name="Swift", | |
| extensions=[".swift"], | |
| keywords=["swift", "print", "func", "var", "let"], | |
| run_command="swift {file}", | |
| docker_image="swift:latest" | |
| ), | |
| "kotlin": LanguageConfig( | |
| name="Kotlin", | |
| extensions=[".kt"], | |
| keywords=["kotlin", "fun", "val", "var", "println"], | |
| run_command="kotlin {file}", | |
| compile_command="kotlinc {file} -include-runtime -d {jar}", | |
| docker_image="zenika/kotlin" | |
| ), | |
| "scala": LanguageConfig( | |
| name="Scala", | |
| extensions=[".scala"], | |
| keywords=["scala", "def", "val", "var", "println", "object"], | |
| run_command="scala {file}", | |
| docker_image="hseeberger/scala-sbt:latest" | |
| ), | |
| "perl": LanguageConfig( | |
| name="Perl", | |
| extensions=[".pl"], | |
| keywords=["perl", "print", "my", "sub", "use"], | |
| run_command="perl {file}", | |
| docker_image="perl:latest", | |
| repl_available=True | |
| ), | |
| "lua": LanguageConfig( | |
| name="Lua", | |
| extensions=[".lua"], | |
| keywords=["lua", "print", "function", "local", "end"], | |
| run_command="lua {file}", | |
| docker_image="nickblah/lua:latest" | |
| ), | |
| "haskell": LanguageConfig( | |
| name="Haskell", | |
| extensions=[".hs"], | |
| keywords=["haskell", "main", "putStrLn", "import"], | |
| run_command="runhaskell {file}", | |
| docker_image="haskell:latest" | |
| ), | |
| "matlab": LanguageConfig( | |
| name="MATLAB", | |
| extensions=[".m"], | |
| keywords=["matlab", "disp", "function", "end", "plot"], | |
| run_command="octave {file}", # Using Octave as MATLAB alternative | |
| docker_image="gnuoctave/octave:latest" | |
| ), | |
| "sql": LanguageConfig( | |
| name="SQL", | |
| extensions=[".sql"], | |
| keywords=["sql", "select", "from", "where", "insert", "update", "create table"], | |
| run_command="sqlite3 < {file}", | |
| docker_image="postgres:latest" | |
| ), | |
| "bash": LanguageConfig( | |
| name="Bash", | |
| extensions=[".sh", ".bash"], | |
| keywords=["bash", "shell", "script", "echo", "#!/bin/bash"], | |
| run_command="bash {file}", | |
| docker_image="bash:latest", | |
| repl_available=True | |
| ), | |
| "powershell": LanguageConfig( | |
| name="PowerShell", | |
| extensions=[".ps1"], | |
| keywords=["powershell", "Write-Host", "function", "param"], | |
| run_command="pwsh {file}", | |
| docker_image="mcr.microsoft.com/powershell:latest" | |
| ) | |
| } | |
| def detect_language(text: str, filename: Optional[str] = None) -> Optional[str]: | |
| """ | |
| Detect programming language from text content or filename | |
| """ | |
| text_lower = text.lower() if text else "" | |
| # Check filename extension first | |
| if filename: | |
| ext = os.path.splitext(filename)[1].lower() | |
| for lang_id, config in LANGUAGES.items(): | |
| if ext in config.extensions: | |
| return lang_id | |
| # Count keyword matches for each language | |
| scores = {} | |
| for lang_id, config in LANGUAGES.items(): | |
| score = 0 | |
| for keyword in config.keywords: | |
| if keyword in text_lower: | |
| score += 1 | |
| if score > 0: | |
| scores[lang_id] = score | |
| # Return language with highest score | |
| if scores: | |
| return max(scores, key=scores.get) | |
| return None | |
| def extract_code_blocks_multi_lang(text: str) -> List[Tuple[str, str]]: | |
| """ | |
| Extract code blocks with their languages from markdown text | |
| Returns list of (language, code) tuples | |
| """ | |
| blocks = [] | |
| # Match ```language\ncode\n``` | |
| pattern = r"```(\w+)?\s*\n(.*?)\n```" | |
| matches = re.finditer(pattern, text, re.DOTALL) | |
| for match in matches: | |
| lang_hint = match.group(1) or "" | |
| code = match.group(2) | |
| # Try to detect language | |
| detected_lang = None | |
| if lang_hint: | |
| # Map common aliases | |
| lang_map = { | |
| "js": "javascript", | |
| "ts": "typescript", | |
| "py": "python", | |
| "cs": "csharp", | |
| "cpp": "cpp", | |
| "c++": "cpp", | |
| "rb": "ruby", | |
| "rs": "rust" | |
| } | |
| detected_lang = lang_map.get(lang_hint.lower(), lang_hint.lower()) | |
| if not detected_lang: | |
| detected_lang = detect_language(code) or "text" | |
| blocks.append((detected_lang, code)) | |
| # If no code blocks, try to detect language in plain text | |
| if not blocks and text.strip(): | |
| lang = detect_language(text) | |
| if lang: | |
| blocks.append((lang, text)) | |
| return blocks | |
| def execute_code(code: str, language: str, timeout: int = 30) -> Dict: | |
| """ | |
| Execute code in any supported language | |
| Returns dict with stdout, stderr, exit_code | |
| """ | |
| if language not in LANGUAGES: | |
| return { | |
| "stdout": "", | |
| "stderr": f"Unsupported language: {language}", | |
| "exit_code": 1, | |
| "language": language | |
| } | |
| config = LANGUAGES[language] | |
| result = {"language": language, "stdout": "", "stderr": "", "exit_code": 0} | |
| try: | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| # Determine file extension | |
| ext = config.extensions[0] | |
| file_path = os.path.join(tmpdir, f"code{ext}") | |
| # Handle special cases | |
| if language == "java": | |
| # Extract class name from code | |
| class_match = re.search(r"public\s+class\s+(\w+)", code) | |
| if class_match: | |
| class_name = class_match.group(1) | |
| file_path = os.path.join(tmpdir, f"{class_name}.java") | |
| else: | |
| class_name = "Main" | |
| # Wrap code in a class if needed | |
| if "public class" not in code: | |
| code = f"public class Main {{\n{code}\n}}" | |
| file_path = os.path.join(tmpdir, "Main.java") | |
| # Write code to file | |
| with open(file_path, "w") as f: | |
| f.write(code) | |
| # Compile if needed | |
| if config.compile_command: | |
| if language == "cpp" or language == "c": | |
| executable = os.path.join(tmpdir, "program") | |
| compile_cmd = config.compile_command.format( | |
| file=file_path, | |
| executable=executable | |
| ) | |
| elif language == "java": | |
| compile_cmd = config.compile_command.format(file=file_path) | |
| else: | |
| compile_cmd = config.compile_command.format(file=file_path) | |
| proc = subprocess.run( | |
| compile_cmd.split(), | |
| capture_output=True, | |
| text=True, | |
| timeout=timeout, | |
| cwd=tmpdir | |
| ) | |
| if proc.returncode != 0: | |
| result["stderr"] = proc.stderr | |
| result["exit_code"] = proc.returncode | |
| return result | |
| # Run the code | |
| if language == "java": | |
| run_cmd = config.run_command.format(classname=class_name.replace(".java", "")) | |
| elif language in ["cpp", "c"]: | |
| run_cmd = config.run_command.format(executable=executable) | |
| else: | |
| run_cmd = config.run_command.format(file=file_path) | |
| proc = subprocess.run( | |
| run_cmd.split(), | |
| capture_output=True, | |
| text=True, | |
| timeout=timeout, | |
| cwd=tmpdir | |
| ) | |
| result["stdout"] = proc.stdout | |
| result["stderr"] = proc.stderr | |
| result["exit_code"] = proc.returncode | |
| except subprocess.TimeoutExpired: | |
| result["stderr"] = f"Execution timeout ({timeout}s)" | |
| result["exit_code"] = -1 | |
| except Exception as e: | |
| result["stderr"] = str(e) | |
| result["exit_code"] = -1 | |
| return result | |
| def execute_code_docker(code: str, language: str, timeout: int = 30) -> Dict: | |
| """ | |
| Execute code in Docker container for better isolation | |
| Requires Docker to be installed and running | |
| """ | |
| if language not in LANGUAGES or not LANGUAGES[language].docker_image: | |
| return execute_code(code, language, timeout) | |
| config = LANGUAGES[language] | |
| try: | |
| import docker | |
| client = docker.from_env() | |
| # Create container with code | |
| container = client.containers.run( | |
| config.docker_image, | |
| command=f"sh -c 'echo \"{code}\" > /tmp/code{config.extensions[0]} && {config.run_command.format(file='/tmp/code' + config.extensions[0])}'", | |
| detach=True, | |
| remove=False | |
| ) | |
| # Wait for completion | |
| result = container.wait(timeout=timeout) | |
| logs = container.logs() | |
| container.remove() | |
| return { | |
| "language": language, | |
| "stdout": logs.decode('utf-8'), | |
| "stderr": "", | |
| "exit_code": result['StatusCode'] | |
| } | |
| except Exception as e: | |
| log.warning(f"Docker execution failed, falling back to local: {e}") | |
| return execute_code(code, language, timeout) | |
| # Enhanced artifact type detection | |
| def detect_requested_output_types_enhanced(text: str) -> Dict: | |
| """ | |
| Enhanced detection that recognizes multiple programming languages | |
| """ | |
| if not text: | |
| return {"requires_artifact": False, "artifact_type": None, "artifact_hint": None, "language": None} | |
| t = text.lower() | |
| # Notebooks | |
| if any(k in t for k in ["jupyter notebook", "jupyter", "notebook", "ipynb"]): | |
| return {"requires_artifact": True, "artifact_type": "notebook", "artifact_hint": "jupyter notebook", "language": "python"} | |
| # Excel/CSV | |
| if any(k in t for k in ["excel", ".xlsx", "spreadsheet", "csv"]): | |
| return {"requires_artifact": True, "artifact_type": "excel", "artifact_hint": "Excel file", "language": None} | |
| # Documents | |
| if any(k in t for k in ["word document", ".docx", "docx"]): | |
| return {"requires_artifact": True, "artifact_type": "word", "artifact_hint": "Word document", "language": None} | |
| if any(k in t for k in ["pdf", "pdf file"]): | |
| return {"requires_artifact": True, "artifact_type": "pdf", "artifact_hint": "PDF document", "language": None} | |
| # Repository | |
| if any(k in t for k in ["repo", "repository", "app repo", "backend", "codebase", "project"]): | |
| return {"requires_artifact": True, "artifact_type": "repo", "artifact_hint": "application repository", "language": detect_language(text)} | |
| # Scripts - detect specific language | |
| for lang_id, config in LANGUAGES.items(): | |
| for keyword in config.keywords: | |
| if keyword in t: | |
| return { | |
| "requires_artifact": True, | |
| "artifact_type": "script", | |
| "artifact_hint": f"{config.name} script", | |
| "language": lang_id | |
| } | |
| # Generic script request | |
| if any(k in t for k in ["script", "program", "code", "function", "convert", "translate"]): | |
| lang = detect_language(text) | |
| return { | |
| "requires_artifact": True, | |
| "artifact_type": "script", | |
| "artifact_hint": f"{LANGUAGES[lang].name if lang else 'Code'} script", | |
| "language": lang | |
| } | |
| return {"requires_artifact": False, "artifact_type": None, "artifact_hint": None, "language": None} | |
| # Update the write_script function | |
| def write_script_multi_lang(code_text: str, language: Optional[str] = None, out_dir: Optional[str] = None) -> str: | |
| """ | |
| Write script file with appropriate extension based on language | |
| """ | |
| out_dir = out_dir or "/tmp" | |
| os.makedirs(out_dir, exist_ok=True) | |
| if language and language in LANGUAGES: | |
| ext = LANGUAGES[language].extensions[0] | |
| else: | |
| # Try to detect from code | |
| detected = detect_language(code_text) | |
| if detected and detected in LANGUAGES: | |
| ext = LANGUAGES[detected].extensions[0] | |
| else: | |
| ext = ".txt" | |
| uid = uuid.uuid4().hex[:10] | |
| filename = os.path.join(out_dir, f"generated_script_{uid}{ext}") | |
| with open(filename, "w", encoding="utf-8") as f: | |
| f.write(code_text) | |
| return filename | |
| # Integration function for existing graph.py | |
| def apply_multi_language_support(): | |
| """ | |
| Monkey-patch existing functions to support multiple languages | |
| """ | |
| import graph as graph_module | |
| # Replace functions | |
| graph_module.detect_requested_output_types = detect_requested_output_types_enhanced | |
| graph_module.write_script = write_script_multi_lang | |
| # Add new execute function that handles all languages | |
| original_execute = getattr(graph_module, 'execute_python_code', None) | |
| def execute_any_code(code: str, language: str = None) -> Dict: | |
| if not language: | |
| language = detect_language(code) or "python" | |
| if language == "python" and original_execute: | |
| # Use existing Python executor for backwards compatibility | |
| return original_execute(code) | |
| else: | |
| return execute_code(code, language) | |
| graph_module.execute_code = execute_any_code | |
| log.info(f"Multi-language support enabled for {len(LANGUAGES)} languages") | |
| return True | |
| # Sample usage and tests | |
| if __name__ == "__main__": | |
| # Test language detection | |
| test_texts = [ | |
| ("print('Hello World')", "python"), | |
| ("console.log('Hello World')", "javascript"), | |
| ("System.out.println('Hello World');", "java"), | |
| ("cout << 'Hello World' << endl;", "cpp"), | |
| ("fmt.Println('Hello World')", "go"), | |
| ("puts 'Hello World'", "ruby"), | |
| ("SELECT * FROM users WHERE age > 18", "sql") | |
| ] | |
| print("Language Detection Tests:") | |
| for text, expected in test_texts: | |
| detected = detect_language(text) | |
| print(f" {expected}: {'✓' if detected == expected else 'X'} (detected: {detected})") | |
| # Test code execution | |
| print("\nCode Execution Tests:") | |
| samples = [ | |
| ("python", "print('Hello from Python')"), | |
| ("javascript", "console.log('Hello from JavaScript')"), | |
| ("ruby", "puts 'Hello from Ruby'"), | |
| ("go", "package main\nimport \"fmt\"\nfunc main() { fmt.Println(\"Hello from Go\") }"), | |
| ] | |
| for lang, code in samples: | |
| result = execute_code(code, lang) | |
| status = "✓" if result["exit_code"] == 0 else "X" | |
| print(f" {lang}: {status} - {result.get('stdout', '').strip()}") |