import streamlit as st import pandas as pd import ftfy def fix_complex_encoding(text): try: # First, try to fix text using ftfy fixed_text = ftfy.fix_text(text) # Additional encoding and decoding steps to handle more cases fixed_text = (fixed_text.encode('latin1', errors='ignore') .decode('utf-8', errors='ignore')) return fixed_text except Exception as e: return text def process_dataframe(df): return df.applymap(fix_complex_encoding) st.title("CSV Encoding Fixer") uploaded_file = st.file_uploader("Upload a CSV file", type="csv") if uploaded_file is not None: df = pd.read_csv(uploaded_file, dtype=str) st.write("Original Data:") st.dataframe(df) processed_df = process_dataframe(df) st.write("Processed Data:") st.dataframe(processed_df) output_csv = processed_df.to_csv(index=False).encode('utf-8') st.download_button( label="Download Processed CSV", data=output_csv, file_name='processed_file.csv', mime='text/csv' )