GranularFireplace commited on
Commit
2cb2d02
·
verified ·
1 Parent(s): 576ef61

Fix duplicates

Browse files
Files changed (1) hide show
  1. app.py +18 -22
app.py CHANGED
@@ -67,46 +67,42 @@ async def clone_yara_repo():
67
  return None
68
 
69
  def preprocess_yara_rules(repo_path: Path) -> Path:
70
- """Preprocess YARA rules to fix syntax issues"""
71
  processed_dir = Path("processed_yara_rules")
72
  if processed_dir.exists():
73
  shutil.rmtree(processed_dir)
74
  processed_dir.mkdir()
75
 
 
 
 
 
76
  for yara_file in repo_path.glob("**/*.yar"):
77
  try:
78
  content = yara_file.read_text(encoding='utf-8', errors='replace')
79
-
80
- # Fix invalid rule names and string identifiers
81
  processed_content = []
82
- rule_counter = 1
83
- string_counter = 1
84
  current_rule = None
85
-
86
  for line in content.split('\n'):
87
- # Normalize rule names
88
  if line.strip().startswith('rule '):
89
  rule_name = line.split('{')[0].split('rule ')[1].strip()
90
- # Replace invalid characters and ensure unique names
91
  clean_name = rule_name.replace('#', '').replace(' ', '_').replace('-', '_')
92
- clean_name = f"Rule_{rule_counter}_{clean_name}"[:128]
 
 
 
 
 
 
93
  processed_content.append(f"rule {clean_name} {{")
94
  current_rule = clean_name
95
- rule_counter += 1
96
- string_counter = 1
97
- # Make string identifiers unique per rule
98
- elif '$a_01_' in line and current_rule:
99
- new_id = f"${current_rule}_str_{string_counter}"
100
- line = line.replace('$a_01_', new_id, 1)
101
- string_counter += 1
102
- processed_content.append(line)
103
  else:
104
  processed_content.append(line)
105
 
106
  # Save processed file
107
  processed_file = processed_dir / yara_file.name
108
  processed_file.write_text('\n'.join(processed_content))
109
-
110
  except Exception as e:
111
  logger.warning(f"Error processing {yara_file}: {str(e)}")
112
  continue
@@ -116,8 +112,8 @@ def preprocess_yara_rules(repo_path: Path) -> Path:
116
  def compile_yara_rules(repo_path: Path) -> Optional[yara.Rules]:
117
  """Compile YARA rules from repository with error handling"""
118
  try:
119
- # processed_dir = preprocess_yara_rules(repo_path)
120
- yara_files = list(repo_path.glob("**/*.yara"))
121
 
122
  if not yara_files:
123
  logger.warning("No YARA files found in repository")
@@ -126,9 +122,9 @@ def compile_yara_rules(repo_path: Path) -> Optional[yara.Rules]:
126
  logger.info(f"Found {len(yara_files)} YARA files, compiling rules")
127
  rules = {}
128
 
129
- for yara_file in yara_files:
130
  try:
131
- rules[str(yara_file)] = str(yara_file)
132
  except Exception as e:
133
  logger.warning(f"Error processing {yara_file}: {str(e)}")
134
 
 
67
  return None
68
 
69
  def preprocess_yara_rules(repo_path: Path) -> Path:
70
+ """Preprocess YARA rules to fix syntax issues and ensure unique rule names"""
71
  processed_dir = Path("processed_yara_rules")
72
  if processed_dir.exists():
73
  shutil.rmtree(processed_dir)
74
  processed_dir.mkdir()
75
 
76
+ rule_pattern = re.compile(r"rule\s+(\w+)") # Match YARA rule names
77
+ seen_rules = set() # Track rule names to prevent duplicates
78
+ rule_counter = 1 # Counter for renaming duplicate rules
79
+
80
  for yara_file in repo_path.glob("**/*.yar"):
81
  try:
82
  content = yara_file.read_text(encoding='utf-8', errors='replace')
 
 
83
  processed_content = []
 
 
84
  current_rule = None
85
+
86
  for line in content.split('\n'):
 
87
  if line.strip().startswith('rule '):
88
  rule_name = line.split('{')[0].split('rule ')[1].strip()
 
89
  clean_name = rule_name.replace('#', '').replace(' ', '_').replace('-', '_')
90
+
91
+ # If the rule name is already seen, rename it
92
+ if clean_name in seen_rules:
93
+ clean_name = f"{clean_name}_{rule_counter}"
94
+ rule_counter += 1
95
+
96
+ seen_rules.add(clean_name)
97
  processed_content.append(f"rule {clean_name} {{")
98
  current_rule = clean_name
 
 
 
 
 
 
 
 
99
  else:
100
  processed_content.append(line)
101
 
102
  # Save processed file
103
  processed_file = processed_dir / yara_file.name
104
  processed_file.write_text('\n'.join(processed_content))
105
+
106
  except Exception as e:
107
  logger.warning(f"Error processing {yara_file}: {str(e)}")
108
  continue
 
112
  def compile_yara_rules(repo_path: Path) -> Optional[yara.Rules]:
113
  """Compile YARA rules from repository with error handling"""
114
  try:
115
+ processed_dir = preprocess_yara_rules(repo_path)
116
+ yara_files = list(processed_dir.glob("**/*.yara"))
117
 
118
  if not yara_files:
119
  logger.warning("No YARA files found in repository")
 
122
  logger.info(f"Found {len(yara_files)} YARA files, compiling rules")
123
  rules = {}
124
 
125
+ for i, yara_file in enumerate(yara_files):
126
  try:
127
+ rules[str(f"{yara_file}_{i}")] = str(yara_file)
128
  except Exception as e:
129
  logger.warning(f"Error processing {yara_file}: {str(e)}")
130