Spaces:
Paused
Paused
| import streamlit as st | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from pandasai import SmartDataframe | |
| from pandasai.llm import OpenAI | |
| from pandasai.callbacks import BaseCallback | |
| from pandasai.responses.response_parser import ResponseParser | |
| import os | |
| # Set OpenAI API Token | |
| OPENAI_API_TOKEN = os.getenv("BIT_OPENAI_API_KEY", "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") | |
| # Load OpenAI Mini model | |
| llm = OpenAI(api_token=OPENAI_API_TOKEN, model_name="gpt-4o-mini") | |
| class StreamlitCallback(BaseCallback): | |
| def __init__(self, container) -> None: | |
| """Initialize callback handler.""" | |
| self.container = container | |
| def on_code(self, response: str): | |
| self.container.code(response) | |
| class StreamlitResponse(ResponseParser): | |
| def __init__(self, context) -> None: | |
| super().__init__(context) | |
| def format_dataframe(self, result): | |
| st.dataframe(result["value"]) | |
| return | |
| def format_plot(self, result): | |
| st.image(result["value"]) | |
| return | |
| def format_other(self, result): | |
| st.write(result["value"]) | |
| return | |
| def process_file(file): | |
| file_extension = file.name.split(".")[-1].lower() | |
| try: | |
| if file_extension == "csv": | |
| # Detect if semicolon or comma is used as a separator | |
| first_line = file.readline().decode("utf-8") | |
| file.seek(0) # Reset file pointer | |
| delimiter = ";" if ";" in first_line else "," | |
| df = pd.read_csv(file, delimiter=delimiter) | |
| elif file_extension in ["xls", "xlsx"]: | |
| import openpyxl # Ensures openpyxl is available | |
| df = pd.read_excel(file, engine="openpyxl") | |
| else: | |
| st.error("Unsupported file format. Please upload a CSV or Excel file.") | |
| return None, None | |
| except ImportError as e: | |
| st.error(f"Error: {e}. Please install 'openpyxl' for Excel support.") | |
| return None, None | |
| sdf = SmartDataframe(df, config={ | |
| "llm": llm, | |
| "save_logs": True, | |
| "verbose": False, | |
| "response_parser": StreamlitResponse | |
| }) | |
| return df, sdf | |
| def main(): | |
| st.title("AI-Powered Dataframe Analysis with OpenAI") | |
| uploaded_file = st.file_uploader("Upload CSV or Excel File", type=["csv", "xls", "xlsx"]) | |
| if uploaded_file: | |
| df, sdf = process_file(uploaded_file) | |
| if df is not None: | |
| st.subheader("๐ Data Preview") | |
| with st.expander("View Data Summary"): | |
| st.write("### Data Overview") | |
| st.dataframe(df.describe(include='all')) | |
| st.write("### First 10 Rows of Data") | |
| st.dataframe(df.head(10)) | |
| st.write("### Column Details") | |
| for col in df.columns: | |
| st.write(f"**{col}**") | |
| st.dataframe(df[[col]].head(10)) | |
| query = st.text_area("Enter Your Query") | |
| if query: | |
| container = st.container() | |
| answer = sdf.chat(query) | |
| if __name__ == "__main__": | |
| main() | |