Spaces:

Quantilytix
/

Talk2Docs

Sleeping

App Files Files Community

rairo commited on Feb 5, 2025

Commit

3a9cf4b

verified ·

1 Parent(s): bb94e23

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -80

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from langchain.chains import ConversationalRetrievalChain
 import os
 import pandas as pd
 from pandasai import SmartDataframe, SmartDatalake
-from pandasai.responses.response_parser import  ResponseParser
 from pandasai.llm import GoogleGemini
 import plotly.graph_objects as go
 from PIL import Image
@@ -17,33 +17,67 @@ import io
 import base64
 class StreamLitResponse(ResponseParser):
-        def __init__(self,context) -> None:
-              super().__init__(context)
-        def format_dataframe(self,result):
-               st.dataframe(result['value'])
-               return
-        def format_plot(self,result):
-               st.image(result['value'])
-               return
-        def format_other(self, result):
-               st.write(result['value'])
-               return
-load_dotenv()  # Load environment variables at the beginning
-GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY') #Use .get to handle if the variable is not present
 if not GOOGLE_API_KEY:
     st.error("GOOGLE_API_KEY environment variable not set.")
     st.stop()
 def generateResponse(prompt, dfs):
     llm = GoogleGemini(api_key=GOOGLE_API_KEY)
-    pandas_agent = SmartDataframe(dfs,config={"llm":llm, "response_parser":StreamLitResponse})
-    answer = pandas_agent.chat(prompt)
-    return answer
 # Processing pdfs
 def get_pdf_text(pdf_docs):
     text = ""
@@ -72,46 +106,6 @@ def get_vectorstore(text_chunks):
     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
     return vectorstore
-#handle user input
-def handle_userinput(question, pdf_vectorstore, dfs):
-    if pdf_vectorstore and st.session_state.conversation:
-        response = st.session_state.conversation({"question": question})
-        st.session_state.chat_history.append({"role": "user", "content": question})
-        assistant_response = response.get('answer')
-        if assistant_response:  # Check if assistant_response is not None or empty
-            st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})  # Directly add string
-        st.rerun()
-    elif dfs:  # PandasAI
-        assistant_response = generateResponse(question, dfs)  # Get the single response
-        st.session_state.chat_history.append({"role": "user", "content": question})
-        if assistant_response:  # Check if assistant_response is not None or empty
-            if isinstance(assistant_response, dict) and 'value' in assistant_response:
-                content_type = assistant_response.get('type')
-                content_value = assistant_response.get('value')
-                if content_type == "dataframe":
-                    st.session_state.chat_history.append({"role": "assistant", "content": "DataFrame"})
-                    st.session_state.chat_history.append({"role": "assistant", "dataframe": content_value})
-                elif content_type == "plot":
-                    st.session_state.chat_history.append({"role": "assistant", "content": "Plot"})
-                    st.session_state.chat_history.append({"role": "assistant", "plot": content_value})
-                else:  # Text or other
-                    st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
-            else:  # Text or other (including None if that's what it is)
-                st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
-        st.rerun()
-        return  # Exit early after PandasAI processing
-    else:
-        st.write("Please upload and process your documents/data first.")
 def get_conversation_chain(vectorstore):
     llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp')
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
@@ -122,9 +116,99 @@ def get_conversation_chain(vectorstore):
     )
     return conversation_chain
 def main():
     st.set_page_config(page_title="Chat with PDFs and Data", page_icon=":books:")
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
@@ -136,36 +220,24 @@ def main():
     st.title("Chat with PDFs and Data :books: :bar_chart:")
-    # Chat display
     for message in st.session_state.chat_history:
         with st.chat_message(message["role"]):
-            if "dataframe" in message:
-                st.dataframe(message["dataframe"])
-            elif "plot" in message:
-                if isinstance(message["plot"], Image.Image):
-                    st.image(message["plot"])
-                elif isinstance(message["plot"], go.Figure):
-                    st.plotly_chart(message["plot"])
-                elif isinstance(message["plot"], bytes):
-                    try:
-                        image = Image.open(io.BytesIO(message["plot"]))
-                        st.image(image)
-                    except Exception as e:
-                        st.error(f"Error displaying image: {e}")
-                else:
-                    st.write("Unsupported plot format")
-            else:
-                st.write(message["content"])
     user_question = st.chat_input("Ask a question about your documents or data:")
     if user_question:
         handle_userinput(user_question, st.session_state.vectorstore, st.session_state.dfs)
     with st.sidebar:
         st.subheader("Your files")
         uploaded_files = st.file_uploader(
-            "Upload PDFs, CSVs, or Excel files (up to 3)", accept_multiple_files=True, key="file_uploader"
         )
         if st.button("Process"):
@@ -175,6 +247,7 @@ def main():
                 pdf_uploaded = False
                 data_uploaded = False
                 for uploaded_file in uploaded_files:
                     file_extension = uploaded_file.name.split(".")[-1].lower()
@@ -204,6 +277,7 @@ def main():
                             st.error(f"Error reading {uploaded_file.name}: {e}")
                             st.stop()
                 if pdf_docs:
                     raw_text = get_pdf_text(pdf_docs)
                     text_chunks = get_text_chunks(raw_text)
@@ -213,6 +287,7 @@ def main():
                     st.session_state.vectorstore = None
                     st.session_state.conversation = None
                 if dfs:
                     st.session_state.dfs = dfs
                 else:

 import os
 import pandas as pd
 from pandasai import SmartDataframe, SmartDatalake
+from pandasai.responses.response_parser import ResponseParser
 from pandasai.llm import GoogleGemini
 import plotly.graph_objects as go
 from PIL import Image
 import base64
 class StreamLitResponse(ResponseParser):
+    def __init__(self, context):
+        super().__init__(context)
+    def format_dataframe(self, result):
+        """Enhanced DataFrame rendering with type identifier"""
+        return {
+            'type': 'dataframe',
+            'value': result['value']
+        }
+    def format_plot(self, result):
+        """Enhanced plot rendering with type identifier"""
+        try:
+            image = result['value']
+            # Convert image to base64 for consistent storage
+            if isinstance(image, Image.Image):
+                buffered = io.BytesIO()
+                image.save(buffered, format="PNG")
+                base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
+            elif isinstance(image, bytes):
+                base64_image = base64.b64encode(image).decode('utf-8')
+            elif isinstance(image, str) and os.path.exists(image):
+                with open(image, "rb") as f:
+                    base64_image = base64.b64encode(f.read()).decode('utf-8')
+            else:
+                return {'type': 'text', 'value': "Unsupported image format"}
+            return {
+                'type': 'plot',
+                'value': base64_image
+            }
+        except Exception as e:
+            return {'type': 'text', 'value': f"Error processing plot: {e}"}
+    def format_other(self, result):
+        """Handle other types of responses"""
+        return {
+            'type': 'text',
+            'value': str(result['value'])
+        }
+# Load environment variables
+load_dotenv()
+GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
 if not GOOGLE_API_KEY:
     st.error("GOOGLE_API_KEY environment variable not set.")
     st.stop()
 def generateResponse(prompt, dfs):
+    """Generate response using PandasAI"""
     llm = GoogleGemini(api_key=GOOGLE_API_KEY)
+    pandas_agent = SmartDatalake(dfs, config={
+        "llm": llm,
+        "response_parser": StreamLitResponse
+    })
+    return pandas_agent.chat(prompt)
+# Other utility functions remain the same as in the original code
+# (get_pdf_text, get_text_chunks, get_vectorstore, get_conversation_chain)
 # Processing pdfs
 def get_pdf_text(pdf_docs):
     text = ""
     vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
     return vectorstore
 def get_conversation_chain(vectorstore):
     llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-exp')
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
     )
     return conversation_chain
+def render_chat_message(message):
+    """Render different types of chat messages"""
+    if "dataframe" in message:
+        st.dataframe(message["dataframe"])
+    elif "plot" in message:
+        try:
+            # Handle base64 encoded images
+            plot_data = message["plot"]
+            if isinstance(plot_data, str):
+                st.image(f"data:image/png;base64,{plot_data}")
+            elif isinstance(plot_data, Image.Image):
+                st.image(plot_data)
+            elif isinstance(plot_data, go.Figure):
+                st.plotly_chart(plot_data)
+            elif isinstance(plot_data, bytes):
+                image = Image.open(io.BytesIO(plot_data))
+                st.image(image)
+            else:
+                st.write("Unsupported plot format")
+        except Exception as e:
+            st.error(f"Error rendering plot: {e}")
+    # Always render text content
+    if "content" in message:
+        st.markdown(message["content"])
+def handle_userinput(question, pdf_vectorstore, dfs):
+    """Enhanced input handling with robust content processing"""
+    try:
+        if pdf_vectorstore and st.session_state.conversation:
+            # PDF/Vector search mode
+            response = st.session_state.conversation({"question": question})
+            st.session_state.chat_history.append({
+                "role": "user",
+                "content": question
+            })
+            assistant_response = response.get('answer', '')
+            st.session_state.chat_history.append({
+                "role": "assistant",
+                "content": assistant_response
+            })
+        elif dfs:
+            # PandasAI data analysis mode
+            st.session_state.chat_history.append({
+                "role": "user",
+                "content": question
+            })
+            # Generate response with PandasAI
+            result = generateResponse(question, dfs)
+            # Handle different response types
+            if isinstance(result, dict):
+                response_type = result.get('type', 'text')
+                response_value = result.get('value')
+                if response_type == 'dataframe':
+                    st.session_state.chat_history.append({
+                        "role": "assistant",
+                        "content": "Here's the DataFrame analysis:",
+                        "dataframe": response_value
+                    })
+                elif response_type == 'plot':
+                    st.session_state.chat_history.append({
+                        "role": "assistant",
+                        "content": "Here's the visualization:",
+                        "plot": response_value
+                    })
+                else:
+                    st.session_state.chat_history.append({
+                        "role": "assistant",
+                        "content": str(response_value)
+                    })
+            else:
+                st.session_state.chat_history.append({
+                    "role": "assistant",
+                    "content": str(result)
+                })
+        else:
+            st.write("Please upload and process your documents/data first.")
+        st.rerun()
+    except Exception as e:
+        st.error(f"Error processing input: {e}")
 def main():
     st.set_page_config(page_title="Chat with PDFs and Data", page_icon=":books:")
+    # Initialize session state variables
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
     st.title("Chat with PDFs and Data :books: :bar_chart:")
+    # Chat display with enhanced rendering
     for message in st.session_state.chat_history:
         with st.chat_message(message["role"]):
+            render_chat_message(message)
+    # Chat input
     user_question = st.chat_input("Ask a question about your documents or data:")
     if user_question:
         handle_userinput(user_question, st.session_state.vectorstore, st.session_state.dfs)
+    # Sidebar for file upload
     with st.sidebar:
         st.subheader("Your files")
         uploaded_files = st.file_uploader(
+            "Upload PDFs, CSVs, or Excel files (up to 3)",
+            accept_multiple_files=True,
+            key="file_uploader"
         )
         if st.button("Process"):
                 pdf_uploaded = False
                 data_uploaded = False
+                # File processing logic remains the same as in the original code
                 for uploaded_file in uploaded_files:
                     file_extension = uploaded_file.name.split(".")[-1].lower()
                             st.error(f"Error reading {uploaded_file.name}: {e}")
                             st.stop()
+                # Set up vectorstore and conversation chain for PDFs
                 if pdf_docs:
                     raw_text = get_pdf_text(pdf_docs)
                     text_chunks = get_text_chunks(raw_text)
                     st.session_state.vectorstore = None
                     st.session_state.conversation = None
+                # Set up DataFrames for PandasAI
                 if dfs:
                     st.session_state.dfs = dfs
                 else: