Spaces:

DrishtiSharma
/

chat-with-patent-dataset

Build error

App Files Files Community

DrishtiSharma commited on Jan 26

Commit

ee80470

verified ·

1 Parent(s): 8395e91

Update lab/interim.py

Browse files

Files changed (1) hide show

lab/interim.py +45 -13

lab/interim.py CHANGED Viewed

@@ -32,6 +32,23 @@ def initialize_llm(model_choice):
 model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=0, horizontal=True)
 llm = initialize_llm(model_choice)
 def load_dataset_into_session():
     input_option = st.radio(
         "Select Dataset Input:",
@@ -43,7 +60,7 @@ def load_dataset_into_session():
         file_path = "./source/test.csv"
         if st.button("Load Dataset"):
             try:
-                st.session_state.df = pd.read_csv(file_path)
                 st.success(f"File loaded successfully from '{file_path}'!")
             except Exception as e:
                 st.error(f"Error loading dataset from the repo directory: {e}")
@@ -55,11 +72,7 @@ def load_dataset_into_session():
         )
         if st.button("Load Dataset"):
             try:
-                dataset = load_dataset(dataset_name, name="sample", split="train", trust_remote_code=True, uniform_split=True)
-                if hasattr(dataset, "to_pandas"):
-                    st.session_state.df = dataset.to_pandas()
-                else:
-                    st.session_state.df = pd.DataFrame(dataset)
                 st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
             except Exception as e:
                 st.error(f"Error loading Hugging Face dataset: {e}")
@@ -69,7 +82,7 @@ def load_dataset_into_session():
         uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
         if uploaded_file:
             try:
-                st.session_state.df = pd.read_csv(uploaded_file)
                 st.success("File uploaded successfully!")
             except Exception as e:
                 st.error(f"Error reading uploaded file: {e}")
@@ -79,12 +92,22 @@ load_dataset_into_session()
 if "df" in st.session_state and llm:
     df = st.session_state.df
     st.write("### Dataset Preview")
-    st.dataframe(df.head(10))
     # Create SmartDataFrame
     chat_df = SmartDataframe(df, config={"llm": llm})
     st.write("### Chat with Your Patent Data")
     user_query = st.text_input("Enter your question about the patent data (e.g., 'Predict if the patent will be accepted.'):")
@@ -95,6 +118,7 @@ if "df" in st.session_state and llm:
         except Exception as e:
             st.error(f"Error: {e}")
     st.write("### Generate and View Graphs")
     plot_query = st.text_input("Enter a query to generate a graph (e.g., 'Plot the number of patents by filing year.'):")
@@ -112,21 +136,29 @@ if "df" in st.session_state and llm:
         except Exception as e:
             st.error(f"Error: {e}")
-# Instructions
 with st.sidebar:
     st.header("Instructions:")
     st.markdown(
-        "1. Select how you want to input the dataset.\n"
         "2. Upload, select, or fetch the dataset using the provided options.\n"
-        "3. Choose an LLM (Groq-based or OpenAI-based) to interact with the data.\n"
         "   - Example: 'Predict if the patent will be accepted.'\n"
         "   - Example: 'What is the primary classification of this patent?'\n"
         "   - Example: 'Summarize the abstract of this patent.'\n"
-        "4. Enter a query to generate and view graphs based on patent attributes.\n"
     )
     st.markdown("---")
     st.header("References:")
     st.markdown(
         "1. [Chat With Your CSV File With PandasAI - Prince Krampah](https://medium.com/aimonks/chat-with-your-csv-file-with-pandasai-22232a13c7b7)"
     )

 model_choice = st.radio("Select LLM", ["GPT-4o", "llama-3.3-70b"], index=0, horizontal=True)
 llm = initialize_llm(model_choice)
+# Cache dataset loading
+@st.cache_data
+def load_repo_dataset(file_path):
+    return pd.read_csv(file_path)
+@st.cache_data
+def load_huggingface_dataset(dataset_name):
+    dataset = load_dataset(dataset_name, name="all", split="train", trust_remote_code=True, uniform_split=True)
+    if hasattr(dataset, "to_pandas"):
+        return dataset.to_pandas()
+    return pd.DataFrame(dataset)
+@st.cache_data
+def load_uploaded_csv(uploaded_file):
+    return pd.read_csv(uploaded_file)
+# Dataset selection logic
 def load_dataset_into_session():
     input_option = st.radio(
         "Select Dataset Input:",
         file_path = "./source/test.csv"
         if st.button("Load Dataset"):
             try:
+                st.session_state.df = load_repo_dataset(file_path)
                 st.success(f"File loaded successfully from '{file_path}'!")
             except Exception as e:
                 st.error(f"Error loading dataset from the repo directory: {e}")
         )
         if st.button("Load Dataset"):
             try:
+                st.session_state.df = load_huggingface_dataset(dataset_name)
                 st.success(f"Hugging Face Dataset '{dataset_name}' loaded successfully!")
             except Exception as e:
                 st.error(f"Error loading Hugging Face dataset: {e}")
         uploaded_file = st.file_uploader("Upload a CSV File:", type=["csv"])
         if uploaded_file:
             try:
+                st.session_state.df = load_uploaded_csv(uploaded_file)
                 st.success("File uploaded successfully!")
             except Exception as e:
                 st.error(f"Error reading uploaded file: {e}")
 if "df" in st.session_state and llm:
     df = st.session_state.df
+    # Display dataset metadata
+    st.write("### Dataset Metadata")
+    st.text(f"Number of Rows: {df.shape[0]}")
+    st.text(f"Number of Columns: {df.shape[1]}")
+    st.text(f"Column Names: {', '.join(df.columns)}")
+    # Display dataset preview
     st.write("### Dataset Preview")
+    num_rows = st.slider("Select number of rows to display:", min_value=5, max_value=50, value=10)
+    st.dataframe(df.head(num_rows))
     # Create SmartDataFrame
     chat_df = SmartDataframe(df, config={"llm": llm})
+    # Chat functionality
     st.write("### Chat with Your Patent Data")
     user_query = st.text_input("Enter your question about the patent data (e.g., 'Predict if the patent will be accepted.'):")
         except Exception as e:
             st.error(f"Error: {e}")
+    # Plot generation functionality
     st.write("### Generate and View Graphs")
     plot_query = st.text_input("Enter a query to generate a graph (e.g., 'Plot the number of patents by filing year.'):")
         except Exception as e:
             st.error(f"Error: {e}")
+    # Download processed dataset
+    st.write("### Download Processed Dataset")
+    st.download_button(
+        label="Download Dataset as CSV",
+        data=df.to_csv(index=False),
+        file_name="processed_dataset.csv",
+        mime="text/csv"
+    )
+# Sidebar instructions
 with st.sidebar:
     st.header("Instructions:")
     st.markdown(
+        "1. Choose an LLM (Groq-based or OpenAI-based) to interact with the data.\n"
         "2. Upload, select, or fetch the dataset using the provided options.\n"
+        "3. Enter a query to generate and view graphs based on patent attributes.\n"
         "   - Example: 'Predict if the patent will be accepted.'\n"
         "   - Example: 'What is the primary classification of this patent?'\n"
         "   - Example: 'Summarize the abstract of this patent.'\n"
+        #"4. Download the processed dataset as a CSV file."
     )
     st.markdown("---")
     st.header("References:")
     st.markdown(
         "1. [Chat With Your CSV File With PandasAI - Prince Krampah](https://medium.com/aimonks/chat-with-your-csv-file-with-pandasai-22232a13c7b7)"
     )