Spaces:
Sleeping
Sleeping
Fix yara preprocessing from overwriting
Browse files
app.py
CHANGED
|
@@ -74,36 +74,60 @@ def preprocess_yara_rules(repo_path: Path) -> Path:
|
|
| 74 |
shutil.rmtree(processed_dir)
|
| 75 |
processed_dir.mkdir()
|
| 76 |
|
| 77 |
-
rule_pattern = re.compile(r"rule\s+(
|
| 78 |
-
seen_rules = {}
|
| 79 |
-
rule_counter = 0
|
| 80 |
|
| 81 |
for yara_file in repo_path.glob("**/*.yara"):
|
| 82 |
if yara_file.name == "misc.yara":
|
| 83 |
logger.info(f"Skipping {yara_file} as it does not belong to any malware family")
|
| 84 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
new_content = []
|
| 86 |
-
with open(yara_file, "r", encoding="utf-8") as f:
|
| 87 |
for line in f:
|
| 88 |
-
|
|
|
|
|
|
|
| 89 |
if match:
|
| 90 |
original_name = match.group(1)
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
line = line.replace(original_name, new_name, 1)
|
|
|
|
|
|
|
| 97 |
else:
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
|
|
|
| 104 |
with open(processed_file, "w", encoding="utf-8") as f:
|
| 105 |
f.writelines(new_content)
|
| 106 |
-
|
|
|
|
| 107 |
return processed_dir
|
| 108 |
|
| 109 |
def compile_yara_rules(repo_path: Path) -> Optional[yara.Rules]:
|
|
|
|
| 74 |
shutil.rmtree(processed_dir)
|
| 75 |
processed_dir.mkdir()
|
| 76 |
|
| 77 |
+
rule_pattern = re.compile(r"rule\s+([^\s{]+)")
|
| 78 |
+
seen_rules = {}
|
| 79 |
+
rule_counter = 0
|
| 80 |
|
| 81 |
for yara_file in repo_path.glob("**/*.yara"):
|
| 82 |
if yara_file.name == "misc.yara":
|
| 83 |
logger.info(f"Skipping {yara_file} as it does not belong to any malware family")
|
| 84 |
continue
|
| 85 |
+
|
| 86 |
+
# Preserve directory structure
|
| 87 |
+
try:
|
| 88 |
+
relative_path = yara_file.relative_to(repo_path)
|
| 89 |
+
except ValueError:
|
| 90 |
+
continue # Skip files not in repo path
|
| 91 |
+
|
| 92 |
+
processed_file = processed_dir / relative_path
|
| 93 |
+
processed_file.parent.mkdir(parents=True, exist_ok=True)
|
| 94 |
+
|
| 95 |
new_content = []
|
| 96 |
+
with open(yara_file, "r", encoding="utf-8", errors='replace') as f:
|
| 97 |
for line in f:
|
| 98 |
+
line = line.rstrip('\n')
|
| 99 |
+
match = rule_pattern.match(line.strip())
|
| 100 |
+
|
| 101 |
if match:
|
| 102 |
original_name = match.group(1)
|
| 103 |
+
# Sanitize rule name
|
| 104 |
+
clean_name = re.sub(r'[^a-zA-Z0-9_]', '_', original_name)
|
| 105 |
+
|
| 106 |
+
# Ensure valid starting character
|
| 107 |
+
if not clean_name:
|
| 108 |
+
clean_name = "invalid_rule"
|
| 109 |
+
elif not clean_name[0].isalpha() and clean_name[0] != '_':
|
| 110 |
+
clean_name = f"_{clean_name}"
|
| 111 |
+
|
| 112 |
+
# Handle duplicates
|
| 113 |
+
if clean_name in seen_rules:
|
| 114 |
+
seen_rules[clean_name] += 1
|
| 115 |
+
new_name = f"{clean_name}_{seen_rules[clean_name]}"
|
| 116 |
line = line.replace(original_name, new_name, 1)
|
| 117 |
+
rule_counter += 1
|
| 118 |
+
logger.debug(f"Renamed duplicate rule: {original_name} -> {new_name}")
|
| 119 |
else:
|
| 120 |
+
seen_rules[clean_name] = 0 # Initialize count
|
| 121 |
+
if clean_name != original_name:
|
| 122 |
+
line = line.replace(original_name, clean_name, 1)
|
| 123 |
+
logger.debug(f"Sanitized rule name: {original_name} -> {clean_name}")
|
| 124 |
+
|
| 125 |
+
new_content.append(line + '\n')
|
| 126 |
+
|
| 127 |
with open(processed_file, "w", encoding="utf-8") as f:
|
| 128 |
f.writelines(new_content)
|
| 129 |
+
|
| 130 |
+
logger.info(f"Processed {rule_counter} duplicate rules")
|
| 131 |
return processed_dir
|
| 132 |
|
| 133 |
def compile_yara_rules(repo_path: Path) -> Optional[yara.Rules]:
|