JatinAutonomousLabs commited on
Commit
e386094
·
verified ·
1 Parent(s): 045bb0b

Create multi_language_support.py

Browse files
Files changed (1) hide show
  1. multi_language_support.py +553 -0
multi_language_support.py ADDED
@@ -0,0 +1,553 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # multi_language_support.py
2
+ """
3
+ Multi-Language Support Module for AI Lab
4
+ Extends the system to support all major programming languages
5
+ """
6
+
7
+ import os
8
+ import re
9
+ import json
10
+ import subprocess
11
+ import tempfile
12
+ import uuid
13
+ from typing import Dict, Optional, Tuple, List
14
+ from dataclasses import dataclass
15
+ import logging
16
+
17
+ log = logging.getLogger(__name__)
18
+
19
+ # Language configurations
20
+ @dataclass
21
+ class LanguageConfig:
22
+ name: str
23
+ extensions: List[str]
24
+ keywords: List[str]
25
+ run_command: str
26
+ compile_command: Optional[str] = None
27
+ docker_image: Optional[str] = None
28
+ repl_available: bool = False
29
+
30
+ LANGUAGES = {
31
+ "python": LanguageConfig(
32
+ name="Python",
33
+ extensions=[".py"],
34
+ keywords=["python", "py", "pip", "import", "def", "pandas", "numpy"],
35
+ run_command="python3 {file}",
36
+ docker_image="python:3.11",
37
+ repl_available=True
38
+ ),
39
+ "javascript": LanguageConfig(
40
+ name="JavaScript",
41
+ extensions=[".js", ".mjs"],
42
+ keywords=["javascript", "js", "node", "npm", "const", "let", "var", "function", "console.log"],
43
+ run_command="node {file}",
44
+ docker_image="node:20",
45
+ repl_available=True
46
+ ),
47
+ "typescript": LanguageConfig(
48
+ name="TypeScript",
49
+ extensions=[".ts"],
50
+ keywords=["typescript", "ts", "interface", "type", "enum"],
51
+ run_command="npx ts-node {file}",
52
+ compile_command="tsc {file}",
53
+ docker_image="node:20"
54
+ ),
55
+ "java": LanguageConfig(
56
+ name="Java",
57
+ extensions=[".java"],
58
+ keywords=["java", "class", "public static void main", "System.out.println"],
59
+ run_command="java {classname}",
60
+ compile_command="javac {file}",
61
+ docker_image="openjdk:17"
62
+ ),
63
+ "csharp": LanguageConfig(
64
+ name="C#",
65
+ extensions=[".cs"],
66
+ keywords=["c#", "csharp", "dotnet", "using System", "namespace", "Console.WriteLine"],
67
+ run_command="dotnet run",
68
+ compile_command="dotnet build",
69
+ docker_image="mcr.microsoft.com/dotnet/sdk:7.0"
70
+ ),
71
+ "cpp": LanguageConfig(
72
+ name="C++",
73
+ extensions=[".cpp", ".cc", ".cxx"],
74
+ keywords=["c++", "cpp", "iostream", "std::", "cout", "#include"],
75
+ run_command="./{executable}",
76
+ compile_command="g++ -o {executable} {file}",
77
+ docker_image="gcc:latest"
78
+ ),
79
+ "c": LanguageConfig(
80
+ name="C",
81
+ extensions=[".c"],
82
+ keywords=["c language", "printf", "scanf", "#include <stdio.h>"],
83
+ run_command="./{executable}",
84
+ compile_command="gcc -o {executable} {file}",
85
+ docker_image="gcc:latest"
86
+ ),
87
+ "go": LanguageConfig(
88
+ name="Go",
89
+ extensions=[".go"],
90
+ keywords=["golang", "go", "package main", "fmt.Println"],
91
+ run_command="go run {file}",
92
+ docker_image="golang:1.21"
93
+ ),
94
+ "rust": LanguageConfig(
95
+ name="Rust",
96
+ extensions=[".rs"],
97
+ keywords=["rust", "cargo", "println!", "fn main"],
98
+ run_command="cargo run",
99
+ compile_command="rustc {file}",
100
+ docker_image="rust:latest"
101
+ ),
102
+ "ruby": LanguageConfig(
103
+ name="Ruby",
104
+ extensions=[".rb"],
105
+ keywords=["ruby", "puts", "def", "end", "gem"],
106
+ run_command="ruby {file}",
107
+ docker_image="ruby:3.2",
108
+ repl_available=True
109
+ ),
110
+ "php": LanguageConfig(
111
+ name="PHP",
112
+ extensions=[".php"],
113
+ keywords=["php", "<?php", "echo", "function", "$"],
114
+ run_command="php {file}",
115
+ docker_image="php:8.2",
116
+ repl_available=True
117
+ ),
118
+ "r": LanguageConfig(
119
+ name="R",
120
+ extensions=[".r", ".R"],
121
+ keywords=["r language", "rlang", "ddply", "ggplot", "data.frame", "<-"],
122
+ run_command="Rscript {file}",
123
+ docker_image="r-base:latest",
124
+ repl_available=True
125
+ ),
126
+ "julia": LanguageConfig(
127
+ name="Julia",
128
+ extensions=[".jl"],
129
+ keywords=["julia", "println", "function", "end"],
130
+ run_command="julia {file}",
131
+ docker_image="julia:latest",
132
+ repl_available=True
133
+ ),
134
+ "swift": LanguageConfig(
135
+ name="Swift",
136
+ extensions=[".swift"],
137
+ keywords=["swift", "print", "func", "var", "let"],
138
+ run_command="swift {file}",
139
+ docker_image="swift:latest"
140
+ ),
141
+ "kotlin": LanguageConfig(
142
+ name="Kotlin",
143
+ extensions=[".kt"],
144
+ keywords=["kotlin", "fun", "val", "var", "println"],
145
+ run_command="kotlin {file}",
146
+ compile_command="kotlinc {file} -include-runtime -d {jar}",
147
+ docker_image="zenika/kotlin"
148
+ ),
149
+ "scala": LanguageConfig(
150
+ name="Scala",
151
+ extensions=[".scala"],
152
+ keywords=["scala", "def", "val", "var", "println", "object"],
153
+ run_command="scala {file}",
154
+ docker_image="hseeberger/scala-sbt:latest"
155
+ ),
156
+ "perl": LanguageConfig(
157
+ name="Perl",
158
+ extensions=[".pl"],
159
+ keywords=["perl", "print", "my", "sub", "use"],
160
+ run_command="perl {file}",
161
+ docker_image="perl:latest",
162
+ repl_available=True
163
+ ),
164
+ "lua": LanguageConfig(
165
+ name="Lua",
166
+ extensions=[".lua"],
167
+ keywords=["lua", "print", "function", "local", "end"],
168
+ run_command="lua {file}",
169
+ docker_image="nickblah/lua:latest"
170
+ ),
171
+ "haskell": LanguageConfig(
172
+ name="Haskell",
173
+ extensions=[".hs"],
174
+ keywords=["haskell", "main", "putStrLn", "import"],
175
+ run_command="runhaskell {file}",
176
+ docker_image="haskell:latest"
177
+ ),
178
+ "matlab": LanguageConfig(
179
+ name="MATLAB",
180
+ extensions=[".m"],
181
+ keywords=["matlab", "disp", "function", "end", "plot"],
182
+ run_command="octave {file}", # Using Octave as MATLAB alternative
183
+ docker_image="gnuoctave/octave:latest"
184
+ ),
185
+ "sql": LanguageConfig(
186
+ name="SQL",
187
+ extensions=[".sql"],
188
+ keywords=["sql", "select", "from", "where", "insert", "update", "create table"],
189
+ run_command="sqlite3 < {file}",
190
+ docker_image="postgres:latest"
191
+ ),
192
+ "bash": LanguageConfig(
193
+ name="Bash",
194
+ extensions=[".sh", ".bash"],
195
+ keywords=["bash", "shell", "script", "echo", "#!/bin/bash"],
196
+ run_command="bash {file}",
197
+ docker_image="bash:latest",
198
+ repl_available=True
199
+ ),
200
+ "powershell": LanguageConfig(
201
+ name="PowerShell",
202
+ extensions=[".ps1"],
203
+ keywords=["powershell", "Write-Host", "function", "param"],
204
+ run_command="pwsh {file}",
205
+ docker_image="mcr.microsoft.com/powershell:latest"
206
+ )
207
+ }
208
+
209
+ def detect_language(text: str, filename: Optional[str] = None) -> Optional[str]:
210
+ """
211
+ Detect programming language from text content or filename
212
+ """
213
+ text_lower = text.lower() if text else ""
214
+
215
+ # Check filename extension first
216
+ if filename:
217
+ ext = os.path.splitext(filename)[1].lower()
218
+ for lang_id, config in LANGUAGES.items():
219
+ if ext in config.extensions:
220
+ return lang_id
221
+
222
+ # Count keyword matches for each language
223
+ scores = {}
224
+ for lang_id, config in LANGUAGES.items():
225
+ score = 0
226
+ for keyword in config.keywords:
227
+ if keyword in text_lower:
228
+ score += 1
229
+ if score > 0:
230
+ scores[lang_id] = score
231
+
232
+ # Return language with highest score
233
+ if scores:
234
+ return max(scores, key=scores.get)
235
+
236
+ return None
237
+
238
+ def extract_code_blocks_multi_lang(text: str) -> List[Tuple[str, str]]:
239
+ """
240
+ Extract code blocks with their languages from markdown text
241
+ Returns list of (language, code) tuples
242
+ """
243
+ blocks = []
244
+
245
+ # Match ```language\ncode\n```
246
+ pattern = r"```(\w+)?\s*\n(.*?)\n```"
247
+ matches = re.finditer(pattern, text, re.DOTALL)
248
+
249
+ for match in matches:
250
+ lang_hint = match.group(1) or ""
251
+ code = match.group(2)
252
+
253
+ # Try to detect language
254
+ detected_lang = None
255
+ if lang_hint:
256
+ # Map common aliases
257
+ lang_map = {
258
+ "js": "javascript",
259
+ "ts": "typescript",
260
+ "py": "python",
261
+ "cs": "csharp",
262
+ "cpp": "cpp",
263
+ "c++": "cpp",
264
+ "rb": "ruby",
265
+ "rs": "rust"
266
+ }
267
+ detected_lang = lang_map.get(lang_hint.lower(), lang_hint.lower())
268
+
269
+ if not detected_lang:
270
+ detected_lang = detect_language(code) or "text"
271
+
272
+ blocks.append((detected_lang, code))
273
+
274
+ # If no code blocks, try to detect language in plain text
275
+ if not blocks and text.strip():
276
+ lang = detect_language(text)
277
+ if lang:
278
+ blocks.append((lang, text))
279
+
280
+ return blocks
281
+
282
+ def execute_code(code: str, language: str, timeout: int = 30) -> Dict:
283
+ """
284
+ Execute code in any supported language
285
+ Returns dict with stdout, stderr, exit_code
286
+ """
287
+ if language not in LANGUAGES:
288
+ return {
289
+ "stdout": "",
290
+ "stderr": f"Unsupported language: {language}",
291
+ "exit_code": 1,
292
+ "language": language
293
+ }
294
+
295
+ config = LANGUAGES[language]
296
+ result = {"language": language, "stdout": "", "stderr": "", "exit_code": 0}
297
+
298
+ try:
299
+ with tempfile.TemporaryDirectory() as tmpdir:
300
+ # Determine file extension
301
+ ext = config.extensions[0]
302
+ file_path = os.path.join(tmpdir, f"code{ext}")
303
+
304
+ # Handle special cases
305
+ if language == "java":
306
+ # Extract class name from code
307
+ class_match = re.search(r"public\s+class\s+(\w+)", code)
308
+ if class_match:
309
+ class_name = class_match.group(1)
310
+ file_path = os.path.join(tmpdir, f"{class_name}.java")
311
+ else:
312
+ class_name = "Main"
313
+ # Wrap code in a class if needed
314
+ if "public class" not in code:
315
+ code = f"public class Main {{\n{code}\n}}"
316
+ file_path = os.path.join(tmpdir, "Main.java")
317
+
318
+ # Write code to file
319
+ with open(file_path, "w") as f:
320
+ f.write(code)
321
+
322
+ # Compile if needed
323
+ if config.compile_command:
324
+ if language == "cpp" or language == "c":
325
+ executable = os.path.join(tmpdir, "program")
326
+ compile_cmd = config.compile_command.format(
327
+ file=file_path,
328
+ executable=executable
329
+ )
330
+ elif language == "java":
331
+ compile_cmd = config.compile_command.format(file=file_path)
332
+ else:
333
+ compile_cmd = config.compile_command.format(file=file_path)
334
+
335
+ proc = subprocess.run(
336
+ compile_cmd.split(),
337
+ capture_output=True,
338
+ text=True,
339
+ timeout=timeout,
340
+ cwd=tmpdir
341
+ )
342
+
343
+ if proc.returncode != 0:
344
+ result["stderr"] = proc.stderr
345
+ result["exit_code"] = proc.returncode
346
+ return result
347
+
348
+ # Run the code
349
+ if language == "java":
350
+ run_cmd = config.run_command.format(classname=class_name.replace(".java", ""))
351
+ elif language in ["cpp", "c"]:
352
+ run_cmd = config.run_command.format(executable=executable)
353
+ else:
354
+ run_cmd = config.run_command.format(file=file_path)
355
+
356
+ proc = subprocess.run(
357
+ run_cmd.split(),
358
+ capture_output=True,
359
+ text=True,
360
+ timeout=timeout,
361
+ cwd=tmpdir
362
+ )
363
+
364
+ result["stdout"] = proc.stdout
365
+ result["stderr"] = proc.stderr
366
+ result["exit_code"] = proc.returncode
367
+
368
+ except subprocess.TimeoutExpired:
369
+ result["stderr"] = f"Execution timeout ({timeout}s)"
370
+ result["exit_code"] = -1
371
+ except Exception as e:
372
+ result["stderr"] = str(e)
373
+ result["exit_code"] = -1
374
+
375
+ return result
376
+
377
+ def execute_code_docker(code: str, language: str, timeout: int = 30) -> Dict:
378
+ """
379
+ Execute code in Docker container for better isolation
380
+ Requires Docker to be installed and running
381
+ """
382
+ if language not in LANGUAGES or not LANGUAGES[language].docker_image:
383
+ return execute_code(code, language, timeout)
384
+
385
+ config = LANGUAGES[language]
386
+
387
+ try:
388
+ import docker
389
+ client = docker.from_env()
390
+
391
+ # Create container with code
392
+ container = client.containers.run(
393
+ config.docker_image,
394
+ command=f"sh -c 'echo \"{code}\" > /tmp/code{config.extensions[0]} && {config.run_command.format(file='/tmp/code' + config.extensions[0])}'",
395
+ detach=True,
396
+ remove=False
397
+ )
398
+
399
+ # Wait for completion
400
+ result = container.wait(timeout=timeout)
401
+ logs = container.logs()
402
+
403
+ container.remove()
404
+
405
+ return {
406
+ "language": language,
407
+ "stdout": logs.decode('utf-8'),
408
+ "stderr": "",
409
+ "exit_code": result['StatusCode']
410
+ }
411
+
412
+ except Exception as e:
413
+ log.warning(f"Docker execution failed, falling back to local: {e}")
414
+ return execute_code(code, language, timeout)
415
+
416
+ # Enhanced artifact type detection
417
+ def detect_requested_output_types_enhanced(text: str) -> Dict:
418
+ """
419
+ Enhanced detection that recognizes multiple programming languages
420
+ """
421
+ if not text:
422
+ return {"requires_artifact": False, "artifact_type": None, "artifact_hint": None, "language": None}
423
+
424
+ t = text.lower()
425
+
426
+ # Notebooks
427
+ if any(k in t for k in ["jupyter notebook", "jupyter", "notebook", "ipynb"]):
428
+ return {"requires_artifact": True, "artifact_type": "notebook", "artifact_hint": "jupyter notebook", "language": "python"}
429
+
430
+ # Excel/CSV
431
+ if any(k in t for k in ["excel", ".xlsx", "spreadsheet", "csv"]):
432
+ return {"requires_artifact": True, "artifact_type": "excel", "artifact_hint": "Excel file", "language": None}
433
+
434
+ # Documents
435
+ if any(k in t for k in ["word document", ".docx", "docx"]):
436
+ return {"requires_artifact": True, "artifact_type": "word", "artifact_hint": "Word document", "language": None}
437
+
438
+ if any(k in t for k in ["pdf", "pdf file"]):
439
+ return {"requires_artifact": True, "artifact_type": "pdf", "artifact_hint": "PDF document", "language": None}
440
+
441
+ # Repository
442
+ if any(k in t for k in ["repo", "repository", "app repo", "backend", "codebase", "project"]):
443
+ return {"requires_artifact": True, "artifact_type": "repo", "artifact_hint": "application repository", "language": detect_language(text)}
444
+
445
+ # Scripts - detect specific language
446
+ for lang_id, config in LANGUAGES.items():
447
+ for keyword in config.keywords:
448
+ if keyword in t:
449
+ return {
450
+ "requires_artifact": True,
451
+ "artifact_type": "script",
452
+ "artifact_hint": f"{config.name} script",
453
+ "language": lang_id
454
+ }
455
+
456
+ # Generic script request
457
+ if any(k in t for k in ["script", "program", "code", "function", "convert", "translate"]):
458
+ lang = detect_language(text)
459
+ return {
460
+ "requires_artifact": True,
461
+ "artifact_type": "script",
462
+ "artifact_hint": f"{LANGUAGES[lang].name if lang else 'Code'} script",
463
+ "language": lang
464
+ }
465
+
466
+ return {"requires_artifact": False, "artifact_type": None, "artifact_hint": None, "language": None}
467
+
468
+ # Update the write_script function
469
+ def write_script_multi_lang(code_text: str, language: Optional[str] = None, out_dir: Optional[str] = None) -> str:
470
+ """
471
+ Write script file with appropriate extension based on language
472
+ """
473
+ out_dir = out_dir or "/tmp"
474
+ os.makedirs(out_dir, exist_ok=True)
475
+
476
+ if language and language in LANGUAGES:
477
+ ext = LANGUAGES[language].extensions[0]
478
+ else:
479
+ # Try to detect from code
480
+ detected = detect_language(code_text)
481
+ if detected and detected in LANGUAGES:
482
+ ext = LANGUAGES[detected].extensions[0]
483
+ else:
484
+ ext = ".txt"
485
+
486
+ uid = uuid.uuid4().hex[:10]
487
+ filename = os.path.join(out_dir, f"generated_script_{uid}{ext}")
488
+
489
+ with open(filename, "w", encoding="utf-8") as f:
490
+ f.write(code_text)
491
+
492
+ return filename
493
+
494
+ # Integration function for existing graph.py
495
+ def apply_multi_language_support():
496
+ """
497
+ Monkey-patch existing functions to support multiple languages
498
+ """
499
+ import graph as graph_module
500
+
501
+ # Replace functions
502
+ graph_module.detect_requested_output_types = detect_requested_output_types_enhanced
503
+ graph_module.write_script = write_script_multi_lang
504
+
505
+ # Add new execute function that handles all languages
506
+ original_execute = getattr(graph_module, 'execute_python_code', None)
507
+
508
+ def execute_any_code(code: str, language: str = None) -> Dict:
509
+ if not language:
510
+ language = detect_language(code) or "python"
511
+
512
+ if language == "python" and original_execute:
513
+ # Use existing Python executor for backwards compatibility
514
+ return original_execute(code)
515
+ else:
516
+ return execute_code(code, language)
517
+
518
+ graph_module.execute_code = execute_any_code
519
+
520
+ log.info(f"Multi-language support enabled for {len(LANGUAGES)} languages")
521
+ return True
522
+
523
+ # Sample usage and tests
524
+ if __name__ == "__main__":
525
+ # Test language detection
526
+ test_texts = [
527
+ ("print('Hello World')", "python"),
528
+ ("console.log('Hello World')", "javascript"),
529
+ ("System.out.println('Hello World');", "java"),
530
+ ("cout << 'Hello World' << endl;", "cpp"),
531
+ ("fmt.Println('Hello World')", "go"),
532
+ ("puts 'Hello World'", "ruby"),
533
+ ("SELECT * FROM users WHERE age > 18", "sql")
534
+ ]
535
+
536
+ print("Language Detection Tests:")
537
+ for text, expected in test_texts:
538
+ detected = detect_language(text)
539
+ print(f" {expected}: {'✓' if detected == expected else 'X'} (detected: {detected})")
540
+
541
+ # Test code execution
542
+ print("\nCode Execution Tests:")
543
+ samples = [
544
+ ("python", "print('Hello from Python')"),
545
+ ("javascript", "console.log('Hello from JavaScript')"),
546
+ ("ruby", "puts 'Hello from Ruby'"),
547
+ ("go", "package main\nimport \"fmt\"\nfunc main() { fmt.Println(\"Hello from Go\") }"),
548
+ ]
549
+
550
+ for lang, code in samples:
551
+ result = execute_code(code, lang)
552
+ status = "✓" if result["exit_code"] == 0 else "X"
553
+ print(f" {lang}: {status} - {result.get('stdout', '').strip()}")