ai-code-detector / language_config.py
Karim Krklec
'first'
dcce181
Raw
History Blame Contribute Delete
12 kB
import re
import tree_sitter_python
import tree_sitter_javascript
import tree_sitter_java
import tree_sitter_cpp
import tree_sitter_c
import tree_sitter_go
import tree_sitter_rust
import tree_sitter_ruby
import tree_sitter_typescript
LANGUAGE_CONFIGS: dict = {
# ── Python ────────────────────────────────────────────────────────────
"python": {
"ts_module": tree_sitter_python,
"extensions": [".py"],
"inline_comment": r"^\s*#",
"block_comment": None,
"docstring_pattern": r'"""[\s\S]*?"""|\'\'\'[\s\S]*?\'\'\'',
"node_types": {
"function": ["function_definition"],
"class": ["class_definition"],
"if": ["if_statement"],
"for": ["for_statement"],
"while": ["while_statement"],
"try": ["try_statement"],
"import": ["import_statement", "import_from_statement"],
"identifier": ["identifier"],
"lambda": ["lambda"],
},
},
# ── JavaScript ────────────────────────────────────────────────────────
"javascript": {
"ts_module": tree_sitter_javascript,
"extensions": [".js", ".mjs", ".cjs"],
"inline_comment": r"^\s*//",
"block_comment": ("/*", "*/"),
"docstring_pattern": r"/\*\*[\s\S]*?\*/",
"node_types": {
"function": ["function_declaration", "arrow_function",
"function_expression", "method_definition"],
"class": ["class_declaration", "class"],
"if": ["if_statement"],
"for": ["for_statement", "for_in_statement"],
"while": ["while_statement"],
"try": ["try_statement"],
"import": ["import_statement"],
"identifier": ["identifier"],
"lambda": ["arrow_function"],
},
},
# ── TypeScript ────────────────────────────────────────────────────────
"typescript": {
"ts_module": tree_sitter_typescript,
"extensions": [".ts"],
"inline_comment": r"^\s*//",
"block_comment": ("/*", "*/"),
"docstring_pattern": r"/\*\*[\s\S]*?\*/",
"node_types": {
"function": ["function_declaration", "arrow_function",
"function_expression", "method_definition"],
"class": ["class_declaration"],
"if": ["if_statement"],
"for": ["for_statement", "for_in_statement"],
"while": ["while_statement"],
"try": ["try_statement"],
"import": ["import_statement"],
"identifier": ["identifier"],
"lambda": ["arrow_function"],
},
},
# ── Java ──────────────────────────────────────────────────────────────
"java": {
"ts_module": tree_sitter_java,
"extensions": [".java"],
"inline_comment": r"^\s*//",
"block_comment": ("/*", "*/"),
"docstring_pattern": r"/\*\*[\s\S]*?\*/",
"node_types": {
"function": ["method_declaration", "constructor_declaration"],
"class": ["class_declaration", "interface_declaration",
"enum_declaration"],
"if": ["if_statement"],
"for": ["for_statement", "enhanced_for_statement"],
"while": ["while_statement"],
"try": ["try_statement"],
"import": ["import_declaration"],
"identifier": ["identifier"],
"lambda": ["lambda_expression"],
},
},
# ── C ─────────────────────────────────────────────────────────────────
"c": {
"ts_module": tree_sitter_c,
"extensions": [".c", ".h"],
"inline_comment": r"^\s*//",
"block_comment": ("/*", "*/"),
"docstring_pattern": r"/\*\*[\s\S]*?\*/",
"node_types": {
"function": ["function_definition"],
"class": ["struct_specifier", "union_specifier"],
"if": ["if_statement"],
"for": ["for_statement"],
"while": ["while_statement"],
"try": [],
"import": ["preproc_include"],
"identifier": ["identifier"],
"lambda": [],
},
},
# ── C++ ───────────────────────────────────────────────────────────────
"cpp": {
"ts_module": tree_sitter_cpp,
"extensions": [".cpp", ".cc", ".cxx", ".hpp"],
"inline_comment": r"^\s*//",
"block_comment": ("/*", "*/"),
"docstring_pattern": r"/\*\*[\s\S]*?\*/",
"node_types": {
"function": ["function_definition"],
"class": ["class_specifier", "struct_specifier"],
"if": ["if_statement"],
"for": ["for_statement", "for_range_loop"],
"while": ["while_statement"],
"try": ["try_statement"],
"import": ["preproc_include"],
"identifier": ["identifier"],
"lambda": ["lambda_expression"],
},
},
# ── Go ────────────────────────────────────────────────────────────────
"go": {
"ts_module": tree_sitter_go,
"extensions": [".go"],
"inline_comment": r"^\s*//",
"block_comment": ("/*", "*/"),
"docstring_pattern": None,
"node_types": {
"function": ["function_declaration", "method_declaration"],
"class": ["type_declaration"],
"if": ["if_statement"],
"for": ["for_statement"],
"while": ["for_statement"],
"try": [],
"import": ["import_declaration"],
"identifier": ["identifier"],
"lambda": ["func_literal"],
},
},
# ── Rust ──────────────────────────────────────────────────────────────
"rust": {
"ts_module": tree_sitter_rust,
"extensions": [".rs"],
"inline_comment": r"^\s*//",
"block_comment": ("/*", "*/"),
"docstring_pattern": r"///.*",
"node_types": {
"function": ["function_item"],
"class": ["struct_item", "impl_item", "trait_item"],
"if": ["if_expression"],
"for": ["for_expression"],
"while": ["while_expression"],
"try": ["match_expression"],
"import": ["use_declaration"],
"identifier": ["identifier"],
"lambda": ["closure_expression"],
},
},
# ── Ruby ──────────────────────────────────────────────────────────────
"ruby": {
"ts_module": tree_sitter_ruby,
"extensions": [".rb"],
"inline_comment": r"^\s*#",
"block_comment": ("=begin", "=end"),
"docstring_pattern": None,
"node_types": {
"function": ["method", "singleton_method"],
"class": ["class", "module"],
"if": ["if", "unless"],
"for": ["for"],
"while": ["while", "until"],
"try": ["begin"],
"import": [],
"identifier": ["identifier"],
"lambda": ["lambda"],
},
},
}
def get_config(language: str) -> dict:
key = language.lower().strip()
if key not in LANGUAGE_CONFIGS:
supported = ", ".join(sorted(LANGUAGE_CONFIGS.keys()))
raise ValueError(
f"Jezik '{language}' nije podrΕΎan.\n"
f"PodrΕΎani jezici: {supported}"
)
return LANGUAGE_CONFIGS[key]
def detect_language_from_extension(filename: str):
filename = filename.lower()
for lang, config in LANGUAGE_CONFIGS.items():
for ext in config["extensions"]:
if filename.endswith(ext):
return lang
return None
def detect_language_from_code(code: str) -> str:
scores = {lang: 0 for lang in LANGUAGE_CONFIGS}
# Python
if re.search(r"\bdef\s+\w+\s*\(", code): scores["python"] += 3
if re.search(r'"""', code): scores["python"] += 2
if re.search(r"\bself\b", code): scores["python"] += 2
if re.search(r"\bprint\s*\(", code): scores["python"] += 1
if re.search(r":\s*$", code, re.MULTILINE): scores["python"] += 1
# Java
if re.search(r"\bpublic\s+class\b", code): scores["java"] += 4
if re.search(r"\bSystem\.out\b", code): scores["java"] += 3
if re.search(r"\bpublic\s+static\b", code): scores["java"] += 2
if re.search(r"\bvoid\s+main\b", code): scores["java"] += 2
# JavaScript
if re.search(r"\bconst\s+\w+\s*=", code): scores["javascript"] += 2
if re.search(r"=>\s*[{(]", code): scores["javascript"] += 3
if re.search(r"\bconsole\.log\b", code): scores["javascript"] += 3
# TypeScript
if re.search(r":\s*(string|number|boolean|void)\b", code):
scores["typescript"] += 4
if re.search(r"\binterface\s+\w+", code): scores["typescript"] += 4
# C / C++
if re.search(r"#include\s*[<\"]", code):
scores["cpp"] += 3
scores["c"] += 3
if re.search(r"\bstd::", code): scores["cpp"] += 4
if re.search(r"\bcout\b", code): scores["cpp"] += 4
if re.search(r"\bcin\b", code): scores["cpp"] += 4
if re.search(r"\busing\s+namespace\b", code): scores["cpp"] += 4
if re.search(r"\btemplate\s*<", code): scores["cpp"] += 3
if re.search(r"\bvector\s*<", code): scores["cpp"] += 3
if re.search(r"\bstring\b", code) and re.search(r"#include", code):
scores["cpp"] += 2
if re.search(r"\bprintf\s*\(", code): scores["c"] += 3
if re.search(r"\bscanf\s*\(", code): scores["c"] += 3
if re.search(r"\bmalloc\s*\(", code): scores["c"] += 3
# Go
if re.search(r"\bfunc\s+\w+\s*\(", code): scores["go"] += 4
if re.search(r"\bpackage\s+\w+", code): scores["go"] += 4
if re.search(r"\bfmt\.Print", code): scores["go"] += 3
# Rust
if re.search(r"\bfn\s+\w+\s*\(", code): scores["rust"] += 4
if re.search(r"\blet\s+mut\b", code): scores["rust"] += 4
if re.search(r"\bprintln!\s*\(", code): scores["rust"] += 3
# Ruby
if re.search(r"\bdef\s+\w+", code) and re.search(r"\bend\b", code):
scores["ruby"] += 3
if re.search(r"\bputs\s+", code): scores["ruby"] += 3
best = max(scores, key=lambda l: scores[l])
return best if scores[best] > 0 else "python"