Spaces:

vedant2905
/

Salient_3_problems

Sleeping

File size: 1,430 Bytes

5d6df7a

import pandas as pd
import os
import glob

def remove_embeddings_column(file_path):
    """Remove the embeddings column from a CSV file."""
    try:
        # Read the CSV file
        df = pd.read_csv(file_path, sep='\t')
        
        # Check if 'embeddings' column exists
        if 'embedding' in df.columns:
            # Remove the embeddings column
            df = df.drop('embedding', axis=1)
            
            # Save the modified DataFrame back to CSV
            df.to_csv(file_path, sep='\t', index=False)
            print(f"Successfully removed embedding column from {file_path}")
        else:
            print(f"No embedding column found in {file_path}")
            
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")

def process_all_layers(base_path="src/codebert/language_classification"):
    """Process explanation_words CSV files for all layers."""
    # Find all explanation_words CSV files
    pattern = os.path.join(base_path, "layer*/explanation_words_layer*.csv")
    files = glob.glob(pattern)
    
    if not files:
        print(f"No explanation_words CSV files found matching pattern: {pattern}")
        return
    
    print(f"Found {len(files)} files to process")
    
    # Process each file
    for file_path in files:
        remove_embeddings_column(file_path)

if __name__ == "__main__":
    process_all_layers()
    print("Processing complete!")