Alyosha11
/

Phoneme

Model card Files Files and versions

Phoneme / extract.py

Alyosha11's picture

Upload extract.py with huggingface_hub

c4d0a5f verified almost 2 years ago

history blame contribute delete

1.23 kB

	import os
	import pyarrow.parquet as pq

	def extract_parquet_files(directory):
	# Create a directory to store the extracted CSV files
	output_directory = "extracted_csv_files"
	os.makedirs(output_directory, exist_ok=True)

	# Iterate over files in the directory
	for filename in os.listdir(directory):
	# Check if the file has a .parquet extension
	if filename.endswith(".parquet"):
	file_path = os.path.join(directory, filename)

	# Read the parquet file
	table = pq.read_table(file_path)

	# Extract the data from the parquet file
	data = table.to_pandas()

	# Generate the output CSV file path
	csv_filename = os.path.splitext(filename)[0] + ".csv"
	csv_file_path = os.path.join(output_directory, csv_filename)

	# Save the extracted data as a CSV file
	data.to_csv(csv_file_path, index=False)

	print(f"Extracted data from {filename} saved as {csv_filename}")

	# Directory containing the parquet files
	parquet_directory = "hindi"

	# Call the function to extract parquet files
	extract_parquet_files(parquet_directory)