File size: 1,430 Bytes
5d6df7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import pandas as pd
import os
import glob

def remove_embeddings_column(file_path):
    """Remove the embeddings column from a CSV file."""
    try:
        # Read the CSV file
        df = pd.read_csv(file_path, sep='\t')
        
        # Check if 'embeddings' column exists
        if 'embedding' in df.columns:
            # Remove the embeddings column
            df = df.drop('embedding', axis=1)
            
            # Save the modified DataFrame back to CSV
            df.to_csv(file_path, sep='\t', index=False)
            print(f"Successfully removed embedding column from {file_path}")
        else:
            print(f"No embedding column found in {file_path}")
            
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")

def process_all_layers(base_path="src/codebert/language_classification"):
    """Process explanation_words CSV files for all layers."""
    # Find all explanation_words CSV files
    pattern = os.path.join(base_path, "layer*/explanation_words_layer*.csv")
    files = glob.glob(pattern)
    
    if not files:
        print(f"No explanation_words CSV files found matching pattern: {pattern}")
        return
    
    print(f"Found {len(files)} files to process")
    
    # Process each file
    for file_path in files:
        remove_embeddings_column(file_path)

if __name__ == "__main__":
    process_all_layers()
    print("Processing complete!")