Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import os | |
| import glob | |
| def remove_embeddings_column(file_path): | |
| """Remove the embeddings column from a CSV file.""" | |
| try: | |
| # Read the CSV file | |
| df = pd.read_csv(file_path, sep='\t') | |
| # Check if 'embeddings' column exists | |
| if 'embedding' in df.columns: | |
| # Remove the embeddings column | |
| df = df.drop('embedding', axis=1) | |
| # Save the modified DataFrame back to CSV | |
| df.to_csv(file_path, sep='\t', index=False) | |
| print(f"Successfully removed embedding column from {file_path}") | |
| else: | |
| print(f"No embedding column found in {file_path}") | |
| except Exception as e: | |
| print(f"Error processing {file_path}: {str(e)}") | |
| def process_all_layers(base_path="src/codebert/language_classification"): | |
| """Process explanation_words CSV files for all layers.""" | |
| # Find all explanation_words CSV files | |
| pattern = os.path.join(base_path, "layer*/explanation_words_layer*.csv") | |
| files = glob.glob(pattern) | |
| if not files: | |
| print(f"No explanation_words CSV files found matching pattern: {pattern}") | |
| return | |
| print(f"Found {len(files)} files to process") | |
| # Process each file | |
| for file_path in files: | |
| remove_embeddings_column(file_path) | |
| if __name__ == "__main__": | |
| process_all_layers() | |
| print("Processing complete!") |