Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import ast | |
| import javalang | |
| import os | |
| class PythonASTAnalyzer(ast.NodeVisitor): | |
| def __init__(self): | |
| self.node_count = 0 | |
| self.max_depth = 0 | |
| self.current_depth = 0 | |
| self.num_functions = 0 | |
| self.num_loops = 0 | |
| self.num_conditionals = 0 | |
| def generic_visit(self, node): | |
| self.node_count += 1 | |
| self.current_depth += 1 | |
| self.max_depth = max(self.max_depth, self.current_depth) | |
| super().generic_visit(node) | |
| self.current_depth -= 1 | |
| def visit_FunctionDef(self, node): | |
| self.num_functions += 1 | |
| self.generic_visit(node) | |
| def visit_For(self, node): | |
| self.num_loops += 1 | |
| self.generic_visit(node) | |
| def visit_While(self, node): | |
| self.num_loops += 1 | |
| self.generic_visit(node) | |
| def visit_If(self, node): | |
| self.num_conditionals += 1 | |
| self.generic_visit(node) | |
| def extract_python_ast_features(code): | |
| try: | |
| tree = ast.parse(code) | |
| analyzer = PythonASTAnalyzer() | |
| analyzer.visit(tree) | |
| return { | |
| "ast_node_count": analyzer.node_count, | |
| "ast_max_depth": analyzer.max_depth, | |
| "num_functions": analyzer.num_functions, | |
| "num_loops": analyzer.num_loops, | |
| "num_conditionals": analyzer.num_conditionals | |
| } | |
| except Exception: | |
| return None | |
| def extract_java_ast_features(code): | |
| try: | |
| tree = javalang.parse.parse(code) | |
| node_count = 0 | |
| max_depth = 0 | |
| num_methods = 0 | |
| num_loops = 0 | |
| num_conditionals = 0 | |
| def walk(node, depth=0): | |
| nonlocal node_count, max_depth, num_methods, num_loops, num_conditionals | |
| if not node: | |
| return | |
| node_count += 1 | |
| max_depth = max(max_depth, depth) | |
| if isinstance(node, javalang.tree.MethodDeclaration): | |
| num_methods += 1 | |
| if isinstance(node, (javalang.tree.ForStatement, | |
| javalang.tree.WhileStatement, | |
| javalang.tree.DoStatement)): | |
| num_loops += 1 | |
| if isinstance(node, javalang.tree.IfStatement): | |
| num_conditionals += 1 | |
| for child in node.children: | |
| if isinstance(child, list): | |
| for c in child: | |
| if isinstance(c, javalang.ast.Node): | |
| walk(c, depth + 1) | |
| elif isinstance(child, javalang.ast.Node): | |
| walk(child, depth + 1) | |
| walk(tree) | |
| return { | |
| "ast_node_count": node_count, | |
| "ast_max_depth": max_depth, | |
| "num_functions": num_methods, | |
| "num_loops": num_loops, | |
| "num_conditionals": num_conditionals | |
| } | |
| except Exception: | |
| return None | |
| def extract_ast_features(df): | |
| rows = [] | |
| for _, row in df.iterrows(): | |
| code = str(row["normalized_code"]) | |
| language = str(row["Language"]).lower() | |
| if language == "python": | |
| feats = extract_python_ast_features(code) | |
| elif language == "java": | |
| feats = extract_java_ast_features(code) | |
| else: | |
| feats = None | |
| if feats is None: | |
| feats = { | |
| "ast_node_count": 0, | |
| "ast_max_depth": 0, | |
| "num_functions": 0, | |
| "num_loops": 0, | |
| "num_conditionals": 0 | |
| } | |
| rows.append(feats) | |
| return pd.DataFrame(rows) | |
| if __name__ == "__main__": | |
| os.makedirs("ast_features", exist_ok=True) | |
| for split in ["train", "val", "test"]: | |
| input_path = f"dataset/processed/dataset_{split}.csv" | |
| df = pd.read_csv(input_path) | |
| if "Label (0- HUMAN, 1-AI)" in df.columns: | |
| df = df.rename(columns={"Label (0- HUMAN, 1-AI)": "Label"}) | |
| X_ast = extract_ast_features(df) | |
| X_ast["Label"] = df["Label"] | |
| output_path = f"featureextraction/step2_ast_extraction/{split}_features.csv" | |
| X_ast.to_csv(output_path, index=False) | |
| print(f"AST features extracted for {split}") | |