Spaces:
Sleeping
Sleeping
Fix duplicates
Browse files
app.py
CHANGED
|
@@ -67,46 +67,42 @@ async def clone_yara_repo():
|
|
| 67 |
return None
|
| 68 |
|
| 69 |
def preprocess_yara_rules(repo_path: Path) -> Path:
|
| 70 |
-
"""Preprocess YARA rules to fix syntax issues"""
|
| 71 |
processed_dir = Path("processed_yara_rules")
|
| 72 |
if processed_dir.exists():
|
| 73 |
shutil.rmtree(processed_dir)
|
| 74 |
processed_dir.mkdir()
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
for yara_file in repo_path.glob("**/*.yar"):
|
| 77 |
try:
|
| 78 |
content = yara_file.read_text(encoding='utf-8', errors='replace')
|
| 79 |
-
|
| 80 |
-
# Fix invalid rule names and string identifiers
|
| 81 |
processed_content = []
|
| 82 |
-
rule_counter = 1
|
| 83 |
-
string_counter = 1
|
| 84 |
current_rule = None
|
| 85 |
-
|
| 86 |
for line in content.split('\n'):
|
| 87 |
-
# Normalize rule names
|
| 88 |
if line.strip().startswith('rule '):
|
| 89 |
rule_name = line.split('{')[0].split('rule ')[1].strip()
|
| 90 |
-
# Replace invalid characters and ensure unique names
|
| 91 |
clean_name = rule_name.replace('#', '').replace(' ', '_').replace('-', '_')
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
processed_content.append(f"rule {clean_name} {{")
|
| 94 |
current_rule = clean_name
|
| 95 |
-
rule_counter += 1
|
| 96 |
-
string_counter = 1
|
| 97 |
-
# Make string identifiers unique per rule
|
| 98 |
-
elif '$a_01_' in line and current_rule:
|
| 99 |
-
new_id = f"${current_rule}_str_{string_counter}"
|
| 100 |
-
line = line.replace('$a_01_', new_id, 1)
|
| 101 |
-
string_counter += 1
|
| 102 |
-
processed_content.append(line)
|
| 103 |
else:
|
| 104 |
processed_content.append(line)
|
| 105 |
|
| 106 |
# Save processed file
|
| 107 |
processed_file = processed_dir / yara_file.name
|
| 108 |
processed_file.write_text('\n'.join(processed_content))
|
| 109 |
-
|
| 110 |
except Exception as e:
|
| 111 |
logger.warning(f"Error processing {yara_file}: {str(e)}")
|
| 112 |
continue
|
|
@@ -116,8 +112,8 @@ def preprocess_yara_rules(repo_path: Path) -> Path:
|
|
| 116 |
def compile_yara_rules(repo_path: Path) -> Optional[yara.Rules]:
|
| 117 |
"""Compile YARA rules from repository with error handling"""
|
| 118 |
try:
|
| 119 |
-
|
| 120 |
-
yara_files = list(
|
| 121 |
|
| 122 |
if not yara_files:
|
| 123 |
logger.warning("No YARA files found in repository")
|
|
@@ -126,9 +122,9 @@ def compile_yara_rules(repo_path: Path) -> Optional[yara.Rules]:
|
|
| 126 |
logger.info(f"Found {len(yara_files)} YARA files, compiling rules")
|
| 127 |
rules = {}
|
| 128 |
|
| 129 |
-
for yara_file in yara_files:
|
| 130 |
try:
|
| 131 |
-
rules[str(yara_file)] = str(yara_file)
|
| 132 |
except Exception as e:
|
| 133 |
logger.warning(f"Error processing {yara_file}: {str(e)}")
|
| 134 |
|
|
|
|
| 67 |
return None
|
| 68 |
|
| 69 |
def preprocess_yara_rules(repo_path: Path) -> Path:
|
| 70 |
+
"""Preprocess YARA rules to fix syntax issues and ensure unique rule names"""
|
| 71 |
processed_dir = Path("processed_yara_rules")
|
| 72 |
if processed_dir.exists():
|
| 73 |
shutil.rmtree(processed_dir)
|
| 74 |
processed_dir.mkdir()
|
| 75 |
|
| 76 |
+
rule_pattern = re.compile(r"rule\s+(\w+)") # Match YARA rule names
|
| 77 |
+
seen_rules = set() # Track rule names to prevent duplicates
|
| 78 |
+
rule_counter = 1 # Counter for renaming duplicate rules
|
| 79 |
+
|
| 80 |
for yara_file in repo_path.glob("**/*.yar"):
|
| 81 |
try:
|
| 82 |
content = yara_file.read_text(encoding='utf-8', errors='replace')
|
|
|
|
|
|
|
| 83 |
processed_content = []
|
|
|
|
|
|
|
| 84 |
current_rule = None
|
| 85 |
+
|
| 86 |
for line in content.split('\n'):
|
|
|
|
| 87 |
if line.strip().startswith('rule '):
|
| 88 |
rule_name = line.split('{')[0].split('rule ')[1].strip()
|
|
|
|
| 89 |
clean_name = rule_name.replace('#', '').replace(' ', '_').replace('-', '_')
|
| 90 |
+
|
| 91 |
+
# If the rule name is already seen, rename it
|
| 92 |
+
if clean_name in seen_rules:
|
| 93 |
+
clean_name = f"{clean_name}_{rule_counter}"
|
| 94 |
+
rule_counter += 1
|
| 95 |
+
|
| 96 |
+
seen_rules.add(clean_name)
|
| 97 |
processed_content.append(f"rule {clean_name} {{")
|
| 98 |
current_rule = clean_name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
else:
|
| 100 |
processed_content.append(line)
|
| 101 |
|
| 102 |
# Save processed file
|
| 103 |
processed_file = processed_dir / yara_file.name
|
| 104 |
processed_file.write_text('\n'.join(processed_content))
|
| 105 |
+
|
| 106 |
except Exception as e:
|
| 107 |
logger.warning(f"Error processing {yara_file}: {str(e)}")
|
| 108 |
continue
|
|
|
|
| 112 |
def compile_yara_rules(repo_path: Path) -> Optional[yara.Rules]:
|
| 113 |
"""Compile YARA rules from repository with error handling"""
|
| 114 |
try:
|
| 115 |
+
processed_dir = preprocess_yara_rules(repo_path)
|
| 116 |
+
yara_files = list(processed_dir.glob("**/*.yara"))
|
| 117 |
|
| 118 |
if not yara_files:
|
| 119 |
logger.warning("No YARA files found in repository")
|
|
|
|
| 122 |
logger.info(f"Found {len(yara_files)} YARA files, compiling rules")
|
| 123 |
rules = {}
|
| 124 |
|
| 125 |
+
for i, yara_file in enumerate(yara_files):
|
| 126 |
try:
|
| 127 |
+
rules[str(f"{yara_file}_{i}")] = str(yara_file)
|
| 128 |
except Exception as e:
|
| 129 |
logger.warning(f"Error processing {yara_file}: {str(e)}")
|
| 130 |
|