Spaces:

vedant2905
/

Salient_3_problems

Sleeping

Salient_3_problems / remove.py

Vedant Pungliya

Updating files

5d6df7a unverified 9 months ago

1.43 kB

	import pandas as pd
	import os
	import glob

	def remove_embeddings_column(file_path):
	"""Remove the embeddings column from a CSV file."""
	try:
	# Read the CSV file
	df = pd.read_csv(file_path, sep='\t')

	# Check if 'embeddings' column exists
	if 'embedding' in df.columns:
	# Remove the embeddings column
	df = df.drop('embedding', axis=1)

	# Save the modified DataFrame back to CSV
	df.to_csv(file_path, sep='\t', index=False)
	print(f"Successfully removed embedding column from {file_path}")
	else:
	print(f"No embedding column found in {file_path}")

	except Exception as e:
	print(f"Error processing {file_path}: {str(e)}")

	def process_all_layers(base_path="src/codebert/language_classification"):
	"""Process explanation_words CSV files for all layers."""
	# Find all explanation_words CSV files
	pattern = os.path.join(base_path, "layer/explanation_words_layer.csv")
	files = glob.glob(pattern)

	if not files:
	print(f"No explanation_words CSV files found matching pattern: {pattern}")
	return

	print(f"Found {len(files)} files to process")

	# Process each file
	for file_path in files:
	remove_embeddings_column(file_path)

	if __name__ == "__main__":
	process_all_layers()
	print("Processing complete!")