Spaces:

MusoraProductDepartment
/

Musolyze

Sleeping

App Files Files Community

Danialebrat commited on Mar 14, 2025

Commit

fe8a467

1 Parent(s): 806f16d

adding codes and files

Browse files

Files changed (13) hide show

.dockerignore +5 -0
.gitignore +5 -0
README.md +2 -14
SmartQuery.py +190 -0
SmartQuery_GC.py +190 -0
access.json +3 -0
app.py +210 -2
auth.py +14 -0
chat_ui.py +55 -0
local_app.py +211 -0
style.css +47 -0
table_config.json +23 -0
utils.py +35 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,5 @@

+# Ignore the .streamlit directory and its contents
+.streamlit/
+# Ignore the .env file
+.env

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+# Ignore the .streamlit directory and its contents
+.streamlit/
+# Ignore the .env file
+.env

README.md CHANGED Viewed

@@ -1,14 +1,2 @@
----
-title: Musolyze
-emoji: 🚀
-colorFrom: purple
-colorTo: red
-sdk: streamlit
-sdk_version: 1.43.2
-app_file: app.py
-pinned: false
-license: apache-2.0
-short_description: Analyzing Musora databases using natural language
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # SmartQuery
2	+ Ask questions from your data in natural language

SmartQuery.py ADDED Viewed

	@@ -0,0 +1,190 @@

+from pandasai.llm import OpenAI
+from pandasai import Agent
+from pandasai import SmartDataframe, SmartDatalake
+from pandasai.responses.response_parser import ResponseParser
+from pandasai.responses.streamlit_response import StreamlitResponse
+from snowflake.snowpark import Session
+import json
+import pandas as pd
+from sqlalchemy import create_engine
+import os
+from dotenv import load_dotenv
+import streamlit as st
+load_dotenv()
+# -----------------------------------------------------------------------
+key = st.secrets["pandasai"]["PANDASAI_API_KEY"]
+os.environ['PANDASAI_API_KEY'] = key
+openai_llm = OpenAI(
+    api_token=st.secrets["openai"]["OPENAI_API"]
+)
+# -----------------------------------------------------------------------
+# -----------------------------------------------------------------------
+class SmartQuery:
+    """
+    class for interacting with dataframes using Natural Language
+    """
+    def __init__(self):
+        with open("table_config.json", "r") as f:
+            self.config = json.load(f)
+    def perform_query_on_dataframes(self, query, *dataframes, response_format=None):
+        """
+        Performs a user-defined query on given pandas DataFrames using PandasAI.
+        Parameters:
+        - query (str): The user's query or instruction.
+        - *dataframes (pd.DataFrame): Any number of pandas DataFrames.
+        Returns:
+        - The result of the query executed by PandasAI.
+        """
+        dataframe_list = list(dataframes)
+        num_dataframes = len(dataframe_list)
+        config = {"llm": openai_llm, "verbose": True, "security": "none", "response_parser": OutputParser}
+        if num_dataframes == 1:
+            result = self.query_single_dataframe(query, dataframe_list[0], config)
+        else:
+            result = self.query_multiple_dataframes(query, dataframe_list, config)
+        return result
+    def query_single_dataframe(self, query, dataframe, config):
+        agent = Agent(dataframe, config=config)
+        response = agent.chat(query)
+        return response
+    def query_multiple_dataframes(self, query, dataframe_list, config):
+        agent = SmartDatalake(dataframe_list, config=config)
+        response = agent.chat(query)
+        return response
+    # -----------------------------------------------------------------------
+    def snowflake_connection(self):
+        """
+        setting snowflake connection
+        :return:
+        """
+        conn = {
+            "user": os.environ.get("snowflake_user"),
+            "password": os.environ.get("snowflake_password"),
+            "account": os.environ.get("snowflake_account"),
+            "role": os.environ.get("snowflake_role"),
+            "database": os.environ.get("snowflake_database"),
+            "warehouse": os.environ.get("snowflake_warehouse"),
+            "schema": os.environ.get("snowflake_schema")
+        }
+        try:
+            session = Session.builder.configs(conn).create()
+            return session
+        except Exception as e:
+            print(f"Error creating Snowflake session: {e}")
+            raise e
+    # ----------------------------------------------------------------------------------------------------
+    def read_snowflake_table(self, session, table_name, brand):
+        """
+        reading tables from snowflake
+        :param dataframe:
+        :return:
+        """
+        query = self._get_query(table_name, brand)
+        # Connect to Snowflake
+        try:
+            dataframe = session.sql(query).to_pandas()
+            dataframe.columns = dataframe.columns.str.lower()
+            print(f"reading content table successfully")
+            return dataframe
+        except Exception as e:
+            print(f"Error in reading table: {e}")
+    # ----------------------------------------------------------------------------------------------------
+    def _get_query(self, table_name: str, brand: str) -> str:
+        # Retrieve the base query template for the given table name
+        base_query = self.config[table_name]["query"]
+        # Insert the brand condition into the query
+        query = base_query.format(brand=brand.lower())
+        return query
+    # ----------------------------------------------------------------------------------------------------
+    def mysql_connection(self):
+        # Setting up the MySQL connection parameters
+        user = os.environ.get("mysql_user")
+        password = os.environ.get("mysql_password")
+        host = os.environ.get("mysql_source")
+        database = os.environ.get("mysql_schema")
+        try:
+            engine = create_engine(f"mysql+pymysql://{user}:{password}@{host}/{database}")
+            return engine
+        except Exception as e:
+            print(f"Error creating MySQL engine: {e}")
+            raise e
+    # ----------------------------------------------------------------------------------------------------
+    def read_mysql_table(self, engine, table_name, brand):
+        query = self._get_query(table_name, brand)
+        with engine.connect() as conn:
+            dataframe = pd.read_sql_query(query, conn)
+        # Convert all column names to lowercase if not
+        dataframe.columns = dataframe.columns.str.lower()
+        return dataframe
+# ----------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------------
+class OutputParser(ResponseParser):
+    def __init__(self, context) -> None:
+        super().__init__(context)
+    def parse(self, result):
+        return result
+    # ----------------------------------------------------------------------------------------------------
+if __name__ == "__main__":
+    query_multi = "get top 5 contents that had the most interactions and their 'content_type' is 'song'. Also include the number of interaction for these contents"
+    query = "select the comments that was on 'pack-bundle-lesson' content_type and have more than 10 likes"
+    query2 = "what is the number of likes, content_title and content_description for the content that received the most comments? "
+    dataframe_path = "data/recent_comment_test.csv"
+    dataframe1 = pd.read_csv(dataframe_path)
+    sq = SmartQuery()
+    interactions_path = "DBT_ANALYTICS.CORE.FCT_CONTENT_INTERACTIONS"
+    content_path = "DBT_ANALYTICS.CORE.DIM_CONTENT"
+    session = sq.snowflake_connection()
+    interactions_df = sq.read_snowflake_table(session, table_name="interactions", brand="drumeo")
+    content_df = sq.read_snowflake_table(session, table_name="contents", brand="drumeo")
+    # single dataframe
+    # result = sq.perform_query_on_dataframes(query, dataframe, response_format="dataframe")
+    # multiple dataframe
+    result = sq.perform_query_on_dataframes(query_multi, interactions_df, content_df, response_format="dataframe")
+    print(result)

SmartQuery_GC.py ADDED Viewed

	@@ -0,0 +1,190 @@

+# this class will use env variables to read secrets from google cloud
+from pandasai import Agent
+from pandasai import SmartDataframe, SmartDatalake
+from pandasai.responses.response_parser import ResponseParser
+from pandasai.llm.openai import OpenAI
+from pandasai.responses.streamlit_response import StreamlitResponse
+import pymysql
+from pandasai.connectors import PandasConnector
+from snowflake.snowpark import Session
+import json
+import pandas as pd
+from sqlalchemy import create_engine
+import os
+import streamlit as st
+from dotenv import load_dotenv
+load_dotenv()
+import datetime
+# -----------------------------------------------------------------------
+key = os.environ.get("PANDASAI_API_KEY")
+os.environ['PANDASAI_API_KEY'] = key
+# openai_llm = OpenAI(api_key=os.environ.get("OPENAI_API"))
+openai_llm = OpenAI(
+    api_token=os.environ.get("OPENAI_API")
+)
+# -----------------------------------------------------------------------
+class SmartQuery:
+    """
+    class for interacting with dataframes using Natural Language
+    """
+    def __init__(self):
+        with open("table_config.json", "r") as f:
+            self.config = json.load(f)
+    def perform_query_on_dataframes(self, query, *dataframes, response_format=None):
+        """
+        Performs a user-defined query on given pandas DataFrames using PandasAI.
+        Parameters:
+        - query (str): The user's query or instruction.
+        - *dataframes (pd.DataFrame): Any number of pandas DataFrames.
+        Returns:
+        - The result of the query executed by PandasAI.
+        """
+        dataframe_list = list(dataframes)
+        num_dataframes = len(dataframe_list)
+        config = {"llm": openai_llm, "verbose": True, "security": "none", "response_parser": OutputParser}
+        if num_dataframes == 1:
+            result = self.query_single_dataframe(query, dataframe_list[0], config)
+        else:
+            result = self.query_multiple_dataframes(query, dataframe_list, config)
+        return result
+    def query_single_dataframe(self, query, dataframe, config):
+        agent = Agent(dataframe, config=config)
+        response = agent.chat(query)
+        return response
+    def query_multiple_dataframes(self, query, dataframe_list, config):
+        agent = SmartDatalake(dataframe_list, config=config)
+        response = agent.chat(query)
+        return response
+    # -----------------------------------------------------------------------
+    def snowflake_connection(self):
+        """
+        setting snowflake connection
+        :return:
+        """
+        conn = {
+            "user": os.environ.get("snowflake_user"),
+            "password": os.environ.get("snowflake_password"),
+            "account": os.environ.get("snowflake_account"),
+            "role": os.environ.get("snowflake_role"),
+            "database": os.environ.get("snowflake_database"),
+            "warehouse": os.environ.get("snowflake_warehouse"),
+            "schema": os.environ.get("snowflake_schema")
+        }
+        try:
+            session = Session.builder.configs(conn).create()
+            return session
+        except Exception as e:
+            print(f"Error creating Snowflake session: {e}")
+            raise e
+    # ----------------------------------------------------------------------------------------------------
+    def read_snowflake_table(self, session, table_name, brand):
+        """
+        reading tables from snowflake
+        :param dataframe:
+        :return:
+        """
+        query = self._get_query(table_name, brand)
+        # Connect to Snowflake
+        try:
+            dataframe = session.sql(query).to_pandas()
+            dataframe.columns = dataframe.columns.str.lower()
+            print(f"reading content table successfully")
+            return dataframe
+        except Exception as e:
+            print(f"Error in reading table: {e}")
+    # ----------------------------------------------------------------------------------------------------
+    def _get_query(self, table_name: str, brand: str) -> str:
+        # Retrieve the base query template for the given table name
+        base_query = self.config[table_name]["query"]
+        # Insert the brand condition into the query
+        query = base_query.format(brand=brand.lower())
+        return query
+    # ----------------------------------------------------------------------------------------------------
+    def mysql_connection(self):
+        # Setting up the MySQL connection parameters
+        user = os.environ.get("mysql_user")
+        password = os.environ.get("mysql_password")
+        host = os.environ.get("mysql_source")
+        database = os.environ.get("mysql_schema")
+        try:
+            engine = create_engine(f"mysql+pymysql://{user}:{password}@{host}/{database}")
+            return engine
+        except Exception as e:
+            print(f"Error creating MySQL engine: {e}")
+            raise e
+    # ----------------------------------------------------------------------------------------------------
+    def read_mysql_table(self, engine, table_name, brand):
+        query = self._get_query(table_name, brand)
+        with engine.connect() as conn:
+            dataframe = pd.read_sql_query(query, conn)
+        # Convert all column names to lowercase if not
+        dataframe.columns = dataframe.columns.str.lower()
+        return dataframe
+# ----------------------------------------------------------------------------------------------------
+# ----------------------------------------------------------------------------------------------------
+class OutputParser(ResponseParser):
+    def __init__(self, context) -> None:
+        super().__init__(context)
+    def parse(self, result):
+        return result
+    # ----------------------------------------------------------------------------------------------------
+if __name__ == "__main__":
+    # query_multi = "get top 5 contents that had the most interactions and their 'content_type' is 'song'. Also include the number of interaction for these contents"
+    # query = "select the comments that was on 'pack-bundle-lesson' content_type and have more than 10 likes"
+    # query2 = "what is the number of likes, content_title and content_description for the content that received the most comments? "
+    # query = "how many users do we have with 0 experience level?"
+    query = "select song content_type that have difficulty range of 0-3?"
+    #
+    # dataframe_path = "data/recent_comment_test.csv"
+    #
+    # dataframe1 = pd.read_csv(dataframe_path)
+    #
+    sq = SmartQuery()
+    session = sq.snowflake_connection()
+    dataframe = sq.read_snowflake_table(session, table_name="contents", brand="drumeo")
+    result = sq.perform_query_on_dataframes(query, dataframe)
+    print(result)

access.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "email": ["danial@musora.com", "danial.ebrat@gmail.com"]
+}

app.py CHANGED Viewed

@@ -1,4 +1,212 @@
 import streamlit as st
-x = st.slider('Select a value PLEASE')
-st.write(x, 'squared is', x * x)

+import os
 import streamlit as st
+from dotenv import load_dotenv
+import pandas as pd
+# Local imports
+from auth import authenticator
+from utils import load_table_config, load_uploaded_files, display_table_descriptions
+# from SmartQuery_GC import SmartQuery
+from SmartQuery import SmartQuery
+# If you use chat_ui.py:
+from chat_ui import display_chat
+load_dotenv()
+# -----------------------------------------------------------------------
+# Set page config
+st.set_page_config(
+    page_title="MusoLyze",
+    page_icon="🤖",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+# -----------------------------------------------------------------------
+# Constants
+# AUTH_TOKEN = os.environ.get("AUTH_TOKEN")
+AUTH_TOKEN = st.secrets["token"]["AUTH_TOKEN"]
+ACCESS_JSON_PATH = "access.json"
+TABLE_CONFIG_PATH = "table_config.json"
+CSS_PATH = "style.css"
+with open(CSS_PATH, "r") as f:
+    css_text = f.read()
+    st.markdown(f"<style>{css_text}</style>", unsafe_allow_html=True)
+# -----------------------------------------------------------------------
+# Initialize Session State
+if "authenticated" not in st.session_state:
+    st.session_state["authenticated"] = False
+if "history" not in st.session_state:
+    st.session_state["history"] = []
+if "dataframes" not in st.session_state:
+    st.session_state["dataframes"] = []
+if "brand" not in st.session_state:
+    st.session_state["brand"] = None
+# NEW: Track the previous selection of brand, tables, and uploaded file names.
+if "previous_selection" not in st.session_state:
+    st.session_state["previous_selection"] = {
+        "brand": None,
+        "tables": [],
+        "uploaded_files": []
+    }
+# -----------------------------------------------------------------------
+# LOGIN PAGE
+if not st.session_state["authenticated"]:
+    st.markdown('<div class="login-container">', unsafe_allow_html=True)
+    st.markdown("## MusoLyze Login")
+    st.write("Please enter your email and authentication token to proceed.")
+    email = st.text_input("Email", placeholder="john.doe@example.com")
+    token = st.text_input("Token", type="password", placeholder="Enter your token")
+    if st.button("Log In"):
+        if authenticator(email, token, AUTH_TOKEN, ACCESS_JSON_PATH):
+            st.session_state["authenticated"] = True
+            st.success("Logged in successfully!")
+            st.stop()  # Force the script to end; next run user is authenticated.
+        else:
+            st.error("Invalid email or token. Please try again.")
+    st.markdown('</div>', unsafe_allow_html=True)
+    st.stop()  # Stop execution so the rest of the page is not shown.
+# -----------------------------------------------------------------------
+# Main App: Load Data, Show Chat
+st.title("💬 MusoLyze")
+# SmartQuery instance
+sq = SmartQuery()
+# Load config file for database tables
+table_config = load_table_config(TABLE_CONFIG_PATH)
+# Sidebar for file upload and table selection
+st.sidebar.title("Data Selection")
+# 1. File upload
+uploaded_files = st.sidebar.file_uploader(
+    "Upload CSV or Excel files",
+    type=['csv', 'xlsx', 'xls'],
+    accept_multiple_files=True
+)
+# 2. Brand selection
+brand = st.sidebar.selectbox("Choose your brand.", ["drumeo", "guitareo", "pianote", "singeo"])
+st.session_state.brand = brand
+# 3. Table selection
+db_tables = st.sidebar.multiselect(
+    "Select tables from database",
+    options=list(table_config.keys()),
+    help="Select one or more tables to include in your data."
+)
+# Show table descriptions if user has selected any
+display_table_descriptions(db_tables, table_config)
+# 'Load Data' button
+if st.sidebar.button("Load Data"):
+    # 1) Build the new selection object to compare with previous_selection.
+    new_selection = {
+        "brand": brand,
+        "tables": db_tables,
+        "uploaded_files": [f.name for f in uploaded_files] if uploaded_files else []
+    }
+    # 2) Compare new selection with old selection; if changed, reset history.
+    if new_selection != st.session_state["previous_selection"]:
+        st.session_state["history"] = []
+    # 3) Proceed with loading data
+    dataframes = []
+    # Load from uploaded files
+    if uploaded_files:
+        dataframes.extend(load_uploaded_files(uploaded_files))
+    # Load dataframes from selected tables
+    if db_tables:
+        for table_name in db_tables:
+            table_info = table_config[table_name]
+            source = table_info["source"]
+            try:
+                if source == 'Snowflake':
+                    session = sq.snowflake_connection()
+                    df = sq.read_snowflake_table(session, table_name, st.session_state.brand)
+                elif source == 'MySQL':
+                    engine = sq.mysql_connection()
+                    df = sq.read_mysql_table(engine, table_name, st.session_state.brand)
+                dataframes.append(df)
+            except Exception as e:
+                st.error(f"Error loading table {table_name}: {e}")
+    st.session_state['dataframes'] = dataframes
+    # 4) Update previous_selection in session state
+    st.session_state["previous_selection"] = new_selection
+    st.success("Data loaded successfully!")
+# --------------------------------------------------------------------------
+# If no data is loaded, warn and stop
+if not st.session_state['dataframes']:
+    st.warning("Please upload at least one file or select a table from the database, then click 'Load Data'.")
+    st.stop()
+# **Always** display top 5 rows of each DataFrame if data is loaded
+for idx, df in enumerate(st.session_state['dataframes']):
+    st.markdown(f"**Preview of loaded data:**")
+    st.dataframe(df.head(5))
+# --- Chat Display Section ---
+display_chat(st.session_state['history'])
+# --- User Input Section ---
+st.markdown("---")
+with st.form(key="user_query_form"):
+    user_query = st.text_input(
+        "Ask a question about your data:",
+        placeholder="Type your question and press Enter..."
+    )
+    send_button = st.form_submit_button("Send")
+if send_button and user_query.strip():
+    with st.spinner("Analyzing your data..."):
+        try:
+            response = sq.perform_query_on_dataframes(user_query, *st.session_state['dataframes'])
+            if response['type'] == "dataframe":
+                df = response['value']
+                st.session_state['history'].append({
+                    'user': user_query,
+                    'type': 'dataframe',
+                    'bot': df  # store the actual DataFrame
+                })
+            elif response['type'] == "plot":
+                plot_image = response['value']
+                st.session_state['history'].append({
+                    'user': user_query,
+                    'type': 'plot',
+                    'bot': plot_image
+                })
+            else:  # string or any other text
+                text_response = response['value']
+                st.session_state['history'].append({
+                    'user': user_query,
+                    'type': 'string',
+                    'bot': text_response
+                })
+            # Rerun to refresh page and clear input
+            st.rerun()
+        except Exception as e:
+            st.error(f"Error: {e}")
+elif send_button and not user_query.strip():
+    st.warning("Please enter a question before sending.")

auth.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import json
+import os
+def load_access_json(file_path: str) -> dict:
+    """Load the JSON file containing the allowed emails."""
+    with open(file_path, 'r') as f:
+        return json.load(f)
+def authenticator(email: str, token: str, auth_token: str, access_json_path: str) -> bool:
+    """Check if the provided email and token are valid."""
+    emails_data = load_access_json(access_json_path)
+    email_list = emails_data["email"]
+    return (email.lower() in email_list) and (token == auth_token)

chat_ui.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import streamlit as st
+import pandas as pd
+def display_chat(history):
+    """Renders the chat history with custom bubbles for each message."""
+    chat_container = st.container()
+    with chat_container:
+        for idx, chat in enumerate(history):
+            # --- User message ---
+            st.markdown(
+                f"""
+                <div class="chat-bubble user-bubble">
+                    <strong>You:</strong> {chat['user']}
+                </div>
+                """,
+                unsafe_allow_html=True
+            )
+            # --- Bot bubble: use the 'type' key to decide how to render ---
+            st.markdown(
+                """
+                <div class="chat-bubble bot-bubble">
+                <strong>Bot:</strong>
+                """,
+                unsafe_allow_html=True,
+            )
+            response_type = chat.get('type', 'string')  # default to 'string'
+            bot_response = chat['bot']
+            if response_type == 'dataframe' and isinstance(bot_response, pd.DataFrame):
+                # Show top 5 rows
+                df_to_display = bot_response
+                if len(df_to_display) > 5:
+                    st.info("Showing the first 5 rows of the DataFrame.")
+                st.dataframe(df_to_display.head(5))
+                # Provide a CSV download
+                csv_data = df_to_display.to_csv(index=False).encode('utf-8')
+                st.download_button(
+                    label="Download data as CSV",
+                    data=csv_data,
+                    file_name=f'result_{idx+1}.csv',
+                    mime='text/csv',
+                    key=f'download_{idx}'
+                )
+            elif response_type == 'plot':
+                # If it's an image object (e.g., PIL Image), show it
+                st.image(bot_response, use_container_width=True)
+            else:  # "string" or any other text
+                st.markdown(f"{bot_response}", unsafe_allow_html=True)
+            st.markdown("</div>", unsafe_allow_html=True)

local_app.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import os
+import streamlit as st
+from dotenv import load_dotenv
+import pandas as pd
+# Local imports
+from auth import authenticator
+from utils import load_table_config, load_uploaded_files, display_table_descriptions
+# from SmartQuery_GC import SmartQuery
+from SmartQuery import SmartQuery
+# If you use chat_ui.py:
+from chat_ui import display_chat
+load_dotenv()
+# -----------------------------------------------------------------------
+# Set page config
+st.set_page_config(
+    page_title="MusoLyze",
+    page_icon="🤖",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+# -----------------------------------------------------------------------
+# Constants
+AUTH_TOKEN = os.environ.get("AUTH_TOKEN")
+ACCESS_JSON_PATH = "access.json"
+TABLE_CONFIG_PATH = "table_config.json"
+CSS_PATH = "style.css"
+with open(CSS_PATH, "r") as f:
+    css_text = f.read()
+    st.markdown(f"<style>{css_text}</style>", unsafe_allow_html=True)
+# -----------------------------------------------------------------------
+# Initialize Session State
+if "authenticated" not in st.session_state:
+    st.session_state["authenticated"] = False
+if "history" not in st.session_state:
+    st.session_state["history"] = []
+if "dataframes" not in st.session_state:
+    st.session_state["dataframes"] = []
+if "brand" not in st.session_state:
+    st.session_state["brand"] = None
+# NEW: Track the previous selection of brand, tables, and uploaded file names.
+if "previous_selection" not in st.session_state:
+    st.session_state["previous_selection"] = {
+        "brand": None,
+        "tables": [],
+        "uploaded_files": []
+    }
+# -----------------------------------------------------------------------
+# LOGIN PAGE
+if not st.session_state["authenticated"]:
+    st.markdown('<div class="login-container">', unsafe_allow_html=True)
+    st.markdown("## MusoLyze Login")
+    st.write("Please enter your email and authentication token to proceed.")
+    email = st.text_input("Email", placeholder="john.doe@example.com")
+    token = st.text_input("Token", type="password", placeholder="Enter your token")
+    if st.button("Log In"):
+        if authenticator(email, token, AUTH_TOKEN, ACCESS_JSON_PATH):
+            st.session_state["authenticated"] = True
+            st.success("Logged in successfully!")
+            st.stop()  # Force the script to end; next run user is authenticated.
+        else:
+            st.error("Invalid email or token. Please try again.")
+    st.markdown('</div>', unsafe_allow_html=True)
+    st.stop()  # Stop execution so the rest of the page is not shown.
+# -----------------------------------------------------------------------
+# Main App: Load Data, Show Chat
+st.title("💬 MusoLyze")
+# SmartQuery instance
+sq = SmartQuery()
+# Load config file for database tables
+table_config = load_table_config(TABLE_CONFIG_PATH)
+# Sidebar for file upload and table selection
+st.sidebar.title("Data Selection")
+# 1. File upload
+uploaded_files = st.sidebar.file_uploader(
+    "Upload CSV or Excel files",
+    type=['csv', 'xlsx', 'xls'],
+    accept_multiple_files=True
+)
+# 2. Brand selection
+brand = st.sidebar.selectbox("Choose your brand.", ["drumeo", "guitareo", "pianote", "singeo"])
+st.session_state.brand = brand
+# 3. Table selection
+db_tables = st.sidebar.multiselect(
+    "Select tables from database",
+    options=list(table_config.keys()),
+    help="Select one or more tables to include in your data."
+)
+# Show table descriptions if user has selected any
+display_table_descriptions(db_tables, table_config)
+# 'Load Data' button
+if st.sidebar.button("Load Data"):
+    # 1) Build the new selection object to compare with previous_selection.
+    new_selection = {
+        "brand": brand,
+        "tables": db_tables,
+        "uploaded_files": [f.name for f in uploaded_files] if uploaded_files else []
+    }
+    # 2) Compare new selection with old selection; if changed, reset history.
+    if new_selection != st.session_state["previous_selection"]:
+        st.session_state["history"] = []
+    # 3) Proceed with loading data
+    dataframes = []
+    # Load from uploaded files
+    if uploaded_files:
+        dataframes.extend(load_uploaded_files(uploaded_files))
+    # Load dataframes from selected tables
+    if db_tables:
+        for table_name in db_tables:
+            table_info = table_config[table_name]
+            source = table_info["source"]
+            try:
+                if source == 'Snowflake':
+                    session = sq.snowflake_connection()
+                    df = sq.read_snowflake_table(session, table_name, st.session_state.brand)
+                elif source == 'MySQL':
+                    engine = sq.mysql_connection()
+                    df = sq.read_mysql_table(engine, table_name, st.session_state.brand)
+                dataframes.append(df)
+            except Exception as e:
+                st.error(f"Error loading table {table_name}: {e}")
+    st.session_state['dataframes'] = dataframes
+    # 4) Update previous_selection in session state
+    st.session_state["previous_selection"] = new_selection
+    st.success("Data loaded successfully!")
+# --------------------------------------------------------------------------
+# If no data is loaded, warn and stop
+if not st.session_state['dataframes']:
+    st.warning("Please upload at least one file or select a table from the database, then click 'Load Data'.")
+    st.stop()
+# **Always** display top 5 rows of each DataFrame if data is loaded
+for idx, df in enumerate(st.session_state['dataframes']):
+    st.markdown(f"**Preview of loaded data:**")
+    st.dataframe(df.head(5))
+# --- Chat Display Section ---
+display_chat(st.session_state['history'])
+# --- User Input Section ---
+st.markdown("---")
+with st.form(key="user_query_form"):
+    user_query = st.text_input(
+        "Ask a question about your data:",
+        placeholder="Type your question and press Enter..."
+    )
+    send_button = st.form_submit_button("Send")
+if send_button and user_query.strip():
+    with st.spinner("Analyzing your data..."):
+        try:
+            response = sq.perform_query_on_dataframes(user_query, *st.session_state['dataframes'])
+            if response['type'] == "dataframe":
+                df = response['value']
+                st.session_state['history'].append({
+                    'user': user_query,
+                    'type': 'dataframe',
+                    'bot': df  # store the actual DataFrame
+                })
+            elif response['type'] == "plot":
+                plot_image = response['value']
+                st.session_state['history'].append({
+                    'user': user_query,
+                    'type': 'plot',
+                    'bot': plot_image
+                })
+            else:  # string or any other text
+                text_response = response['value']
+                st.session_state['history'].append({
+                    'user': user_query,
+                    'type': 'string',
+                    'bot': text_response
+                })
+            # Rerun to refresh page and clear input
+            st.rerun()
+        except Exception as e:
+            st.error(f"Error: {e}")
+elif send_button and not user_query.strip():
+    st.warning("Please enter a question before sending.")

style.css ADDED Viewed

	@@ -0,0 +1,47 @@

+/* Base Theme */
+body {
+    background-color: #000000;
+    color: #FFD700;
+}
+.stButton>button {
+    background-color: #FFD700;
+    color: #000000;
+}
+.stTextInput>div>div>input {
+    color: #FFD700;
+    border-color: #FFD700 !important;
+}
+.stSidebar {
+    background-color: #1E1E1E;
+}
+/* Center the login container */
+.login-container {
+    max-width: 400px;
+    margin: 0 auto;
+    padding: 2rem;
+    background-color: #1E1E1E;
+    border-radius: 10px;
+}
+.login-container h2 {
+    text-align: center;
+}
+/* Chat-like bubbles */
+.chat-bubble {
+    padding: 10px;
+    border-radius: 10px;
+    margin: 5px 0;
+    max-width: 80%;
+    word-wrap: break-word;
+}
+.user-bubble {
+    background-color: #1E1E1E;
+    border: 1px solid #FFD700;
+    align-self: flex-start;
+}
+.bot-bubble {
+    background-color: #FFD700;
+    color: #000;
+    align-self: flex-end;
+}

table_config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "interactions":
+    {
+      "description": "This table contains interaction history of the users with all the Musora content.",
+      "source": "Snowflake",
+      "cols": ["user_id", "content_id", "brand","TIMESTAMP", "EVENT_TEXT", "CONTENT_TYPE", "DIFFICULTY"],
+      "query": "select * from ONLINE_RECSYS.PREPROCESSED.RECSYS_INTEACTIONS where brand = '{brand}'"
+    },
+  "contents":
+    {
+      "description": "This table contains information about Musora contents.",
+      "source": "Snowflake",
+      "cols":  ["content_id", "brand", "content_title", "content_type", "content_description", "artist", "difficulty", "STYLE", "TOPIC","published_at"],
+      "query": "select * from ONLINE_RECSYS.PREPROCESSED.CONTENTS where brand = '{brand}'"
+    },
+  "users":
+    {
+      "description": "This table contains information about Musora users.",
+      "source": "Snowflake",
+      "cols":  ["USER_ID", "BRAND", "DIFFICULTY", "SELF_REPORT_DIFFICULTY", "USER_PROFILE", "PERMISSION","EXPIRATION_DATE"],
+      "query": "select * from ONLINE_RECSYS.PREPROCESSED.USERS where brand = '{brand}'"
+    }
+}

utils.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import pandas as pd
+import streamlit as st
+import json
+def load_table_config(file_path: str) -> dict:
+    """Load the table configuration JSON."""
+    with open(file_path, 'r') as f:
+        return json.load(f)
+def load_uploaded_files(uploaded_files):
+    """
+    Load dataframes from the uploaded files (CSV/Excel).
+    Returns a list of pandas DataFrames.
+    """
+    dataframes = []
+    for file in uploaded_files:
+        if file.name.endswith('.csv'):
+            df = pd.read_csv(file)
+        else:
+            df = pd.read_excel(file)
+        dataframes.append(df)
+    return dataframes
+def display_table_descriptions(selected_tables, table_config):
+    """
+    Given a list of selected table names and the table config,
+    write out their descriptions in the sidebar.
+    """
+    if selected_tables:
+        st.sidebar.subheader("Table Descriptions")
+        for table_name in selected_tables:
+            description = table_config[table_name].get('description', "No description available.")
+            cols = table_config[table_name].get('cols', [])
+            st.sidebar.markdown(f"**{table_name}**: {description}")
+            st.sidebar.markdown(f"**Available columns**: {cols}")