Spaces:

shamilcoded
/

Data-Excel

Sleeping

App Files Files Community

SHAMIL SHAHBAZ AWAN commited on Jan 1, 2025

Commit

a36f392

verified ·

1 Parent(s): 0779acb

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -17

app.py CHANGED Viewed

@@ -2,8 +2,8 @@ import streamlit as st
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
-from io import StringIO
 from transformers import pipeline
 # Load a lightweight NLP model for query understanding
 nlp = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
@@ -15,8 +15,7 @@ def load_file(uploaded_file):
         if uploaded_file.type == "text/csv":
             data = pd.read_csv(uploaded_file)
         elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
-            # Load all sheets if it's an Excel file
-            data = pd.read_excel(uploaded_file, sheet_name=None)  # Load all sheets into a dictionary
         else:
             st.error("Unsupported file type.")
             return None
@@ -33,23 +32,19 @@ def classify_query(query, candidate_labels):
         return results['labels'][0]
     return None
-# Function to generate graph based on user query
 def generate_graph(data, query):
     """Generate a graph based on user query."""
     try:
         fig, ax = plt.subplots(figsize=(10, 6))
-        # Extract columns from data (if it's a dictionary of sheets, flatten it)
-        if isinstance(data, dict):
-            data = pd.concat(data.values(), ignore_index=True)  # Combine all sheets into a single dataframe
         # Infer column types
         numerical_columns = data.select_dtypes(include=['number']).columns.tolist()
         categorical_columns = data.select_dtypes(include=['object', 'category']).columns.tolist()
         datetime_columns = data.select_dtypes(include=['datetime']).columns.tolist()
         # Define possible graph types
-        candidate_labels = ["bar chart", "line chart", "scatter plot", "histogram", "sales question"]
         query_type = classify_query(query, candidate_labels)
         # Provide text-based query response
@@ -104,6 +99,19 @@ def generate_graph(data, query):
             else:
                 response += " Could not find relevant 'department' or 'sales' columns in the dataset."
         else:
             response = "Unsupported graph type or insufficient data. Try asking for a bar chart, line chart, scatter plot, histogram, or sales-related question."
@@ -148,18 +156,14 @@ def main():
         data = load_file(uploaded_file)
         if data is not None:
-            if isinstance(data, dict):  # For Excel with multiple sheets
-                st.write("Sheets in Excel file:", list(data.keys()))
-                sheet_name = st.selectbox("Select a sheet", list(data.keys()))
-                data = data[sheet_name]  # Use the selected sheet
             st.write("Dataset preview:", data.head())
-            # User input for graph generation or general questions
-            query = st.text_input("Enter your query (e.g., 'Generate a bar chart for countries and gross sales', or 'Which department has the most sales?')")
-            if query:
                 # Generate the graph based on the query or handle general questions
-                generate_graph(data, query)
 if __name__ == "__main__":
     main()

 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 from transformers import pipeline
+import numpy as np
 # Load a lightweight NLP model for query understanding
 nlp = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
         if uploaded_file.type == "text/csv":
             data = pd.read_csv(uploaded_file)
         elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
+            data = pd.read_excel(uploaded_file)
         else:
             st.error("Unsupported file type.")
             return None
         return results['labels'][0]
     return None
+# Function to generate a graph based on user query
 def generate_graph(data, query):
     """Generate a graph based on user query."""
     try:
         fig, ax = plt.subplots(figsize=(10, 6))
         # Infer column types
         numerical_columns = data.select_dtypes(include=['number']).columns.tolist()
         categorical_columns = data.select_dtypes(include=['object', 'category']).columns.tolist()
         datetime_columns = data.select_dtypes(include=['datetime']).columns.tolist()
         # Define possible graph types
+        candidate_labels = ["bar chart", "line chart", "scatter plot", "histogram", "sales question", "general question"]
         query_type = classify_query(query, candidate_labels)
         # Provide text-based query response
             else:
                 response += " Could not find relevant 'department' or 'sales' columns in the dataset."
+        elif query_type == "general question":
+            # Handle general questions
+            response = "Analyzing the data for your general question."
+            # Apply simple logic to answer the query based on dataset
+            if "sales" in query.lower():
+                response += " Checking for the highest sales..."
+                sales_column = infer_column(data, ["sales", "revenue"])
+                if sales_column:
+                    top_country = data.loc[data[sales_column].idxmax(), 'country']  # Assuming 'country' column exists
+                    response += f" The country with the highest sales is {top_country}."
+                else:
+                    response += " Could not find a 'sales' column."
         else:
             response = "Unsupported graph type or insufficient data. Try asking for a bar chart, line chart, scatter plot, histogram, or sales-related question."
         data = load_file(uploaded_file)
         if data is not None:
             st.write("Dataset preview:", data.head())
+            # User input for query
+            user_query = st.text_input("Enter your query (e.g., 'Generate a bar chart for countries and sales', or 'Which country has the highest sales?')")
+            if user_query:
                 # Generate the graph based on the query or handle general questions
+                generate_graph(data, user_query)
 if __name__ == "__main__":
     main()