Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from pyspark.sql import SparkSession | |
| from huggingface_hub import HfApi, hf_hub_download | |
| import requests | |
| # Initialize a Spark session | |
| spark = SparkSession.builder.appName("HuggingFaceSpark").getOrCreate() | |
| # Function to read Parquet files from Hugging Face | |
| def read_parquet_from_hf(repo_id, filename): | |
| file_path = hf_hub_download(repo_id=repo_id, filename=filename) | |
| df = spark.read.parquet(file_path) | |
| return df | |
| # Example use case: Load a dataset from Hugging Face and display it | |
| def load_and_display_dataset(): | |
| st.title("Hugging Face Spark Integration") | |
| repo_id = st.text_input("Enter Hugging Face Dataset Repo ID (e.g., 'james-burton/wine_reviews'):") | |
| filename = st.text_input("Enter Dataset File Name (e.g., 'train/0.parquet'):") | |
| if st.button("Load Dataset"): | |
| if repo_id and filename: | |
| try: | |
| df = read_parquet_from_hf(repo_id, filename) | |
| st.write("Dataset Schema:") | |
| st.write(df.printSchema()) | |
| st.write("First 10 rows of the dataset:") | |
| st.write(df.show(10)) | |
| except Exception as e: | |
| st.error(f"Error loading dataset: {e}") | |
| else: | |
| st.error("Please enter both the repo ID and filename.") | |
| if __name__ == "__main__": | |
| load_and_display_dataset() |