Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import google.generativeai as genai | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import plotly.express as px | |
| # Set up page layout | |
| st.set_page_config(page_title="AI CSV Data Analyst", layout="wide") | |
| # Initialize Gemini API (Replace with your API Key) | |
| import os | |
| GEMINI_API_KEY = "AIzaSyDt0TM6beHrE-f5bvfYXQa6iDACSCfU7go" | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| # Create two columns (70:30 split) | |
| left_col, right_col = st.columns([7, 3]) | |
| with left_col: | |
| st.title("📊 AI-Powered CSV Data Analyst") | |
| # File Upload | |
| uploaded_file = st.file_uploader("Upload a CSV or Excel file", type=["csv", "xlsx"]) | |
| if uploaded_file is not None: | |
| # Read File | |
| file_ext = uploaded_file.name.split(".")[-1] | |
| if file_ext == "csv": | |
| df = pd.read_csv(uploaded_file, dtype=str) | |
| elif file_ext == "xlsx": | |
| df = pd.read_excel(uploaded_file, engine="openpyxl", dtype=str) | |
| import numpy as np | |
| # Display the DataFrame | |
| st.subheader("📂 Uploaded Data") | |
| st.dataframe(df) | |
| # Data Insights | |
| st.subheader("📈 Data Insights") | |
| # Dataset Summary | |
| st.write(f"**Rows:** {df.shape[0]}, **Columns:** {df.shape[1]}") | |
| st.write(f"**Missing Values:**") | |
| st.write(df.isnull().sum()) | |
| # Basic Statistics | |
| st.subheader("📊 Statistical Summary") | |
| numeric_df = df.apply(pd.to_numeric, errors='coerce') | |
| if numeric_df.select_dtypes(include=['number']).shape[1] > 0: | |
| st.write(numeric_df.describe()) | |
| else: | |
| st.info("No hay columnas numéricas para mostrar estadísticas.") | |
| # Visualizations | |
| st.subheader("📉 Data Visualizations") | |
| # Select Column for Histogram | |
| numeric_columns = df.select_dtypes(include=["number"]).columns | |
| if len(numeric_columns) > 0: | |
| col = st.selectbox("Select a column for histogram:", numeric_columns) | |
| fig = px.histogram(df, x=col, title=f"Histogram of {col}") | |
| st.plotly_chart(fig) | |
| # Correlation Heatmap | |
| if len(numeric_columns) > 1: | |
| st.subheader("🔍 Correlation Heatmap") | |
| fig, ax = plt.subplots(figsize=(6, 4)) | |
| sns.heatmap(df[numeric_columns].corr(), annot=True, cmap="coolwarm", ax=ax) | |
| st.pyplot(fig) | |
| with right_col: | |
| st.subheader("💬 Chat with Your Data") | |
| user_query = st.text_input("Ask a question about the data...") | |
| if user_query and uploaded_file is not None: | |
| # Prepare prompt for AI (limited to 5 columns and rounded) | |
| numeric_df = df.apply(pd.to_numeric, errors="coerce") | |
| safe_summary = numeric_df.describe().round(2).iloc[:, :5] | |
| prompt = f""" | |
| You are a data analyst. The user has uploaded a dataset. | |
| Answer the query based on the dataset provided. | |
| Dataset Overview (first 5 numeric columns): | |
| {safe_summary.to_string()} | |
| User Question: | |
| {user_query} | |
| """ | |
| try: | |
| model = genai.GenerativeModel("models/gemini-pro") | |
| response = model.generate_content(prompt) | |
| if hasattr(response, "text"): | |
| st.write("🤖 AI Response:") | |
| st.write(response.text) | |
| else: | |
| st.error("No se recibió una respuesta válida del modelo.") | |
| except Exception as e: | |
| st.error(f"Error al consultar el modelo Gemini: {e}") |