Spaces:
Sleeping
Sleeping
File size: 1,430 Bytes
5d6df7a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import pandas as pd
import os
import glob
def remove_embeddings_column(file_path):
"""Remove the embeddings column from a CSV file."""
try:
# Read the CSV file
df = pd.read_csv(file_path, sep='\t')
# Check if 'embeddings' column exists
if 'embedding' in df.columns:
# Remove the embeddings column
df = df.drop('embedding', axis=1)
# Save the modified DataFrame back to CSV
df.to_csv(file_path, sep='\t', index=False)
print(f"Successfully removed embedding column from {file_path}")
else:
print(f"No embedding column found in {file_path}")
except Exception as e:
print(f"Error processing {file_path}: {str(e)}")
def process_all_layers(base_path="src/codebert/language_classification"):
"""Process explanation_words CSV files for all layers."""
# Find all explanation_words CSV files
pattern = os.path.join(base_path, "layer*/explanation_words_layer*.csv")
files = glob.glob(pattern)
if not files:
print(f"No explanation_words CSV files found matching pattern: {pattern}")
return
print(f"Found {len(files)} files to process")
# Process each file
for file_path in files:
remove_embeddings_column(file_path)
if __name__ == "__main__":
process_all_layers()
print("Processing complete!") |