Spaces:

Vashishta-S-2141
/

LLM_Powered_Database_Chatbot

Sleeping

App Files Files Community

SVashishta1 commited on Mar 11, 2025

Commit

6950cd1

1 Parent(s): a610301

Initial commit

Browse files

Files changed (4) hide show

app.py +5 -5
backend/db.py +3 -3
backend/document_parser.py +10 -10
backend/vector_db.py +5 -5

app.py CHANGED Viewed

@@ -456,7 +456,7 @@ def clear_context():
     except Exception as e:
         return [{"role": "assistant", "content": f"Error clearing context: {str(e)}"}]
-# I am making a function for voice input but we are not using it
 """
 def process_voice_input(audio_path):
     # I am checking if there is audio
@@ -484,7 +484,7 @@ def process_voice_input(audio_path):
         return f"Error processing audio: {str(e)}"
 """
-# I am making a function for text to speech but we are not using it
 """
 def text_to_speech_output(text):
     # I am checking if there is text
@@ -867,7 +867,7 @@ with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo:
         clear_btn.click(lambda: None, None, [chatbot], queue=False)
         clear_context_btn.click(clear_context, inputs=[], outputs=[chatbot])
-        # I am commenting out voice button click because we are not using it
         """
         voice_btn.click(
             lambda: gr.update(visible=True),
@@ -876,7 +876,7 @@ with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo:
         )
         """
-        # I am commenting out voice input change because we are not using it
         """
         voice_input.change(
             process_voice_input,
@@ -885,7 +885,7 @@ with gr.Blocks(title="AI Document Analysis & Voice Assistant") as demo:
         )
         """
-        # I am commenting out TTS button because we are not using it
         """
         tts_btn = gr.Button("🔊 Speak Response")
         tts_btn.click(

     except Exception as e:
         return [{"role": "assistant", "content": f"Error clearing context: {str(e)}"}]
+# I am making a function for voice input but we are not using it in this version(still in development phase)
 """
 def process_voice_input(audio_path):
     # I am checking if there is audio
         return f"Error processing audio: {str(e)}"
 """
+#  a function for text to speech
 """
 def text_to_speech_output(text):
     # I am checking if there is text
         clear_btn.click(lambda: None, None, [chatbot], queue=False)
         clear_context_btn.click(clear_context, inputs=[], outputs=[chatbot])
+        # I am commenting out voice button click because it is still in development phase
         """
         voice_btn.click(
             lambda: gr.update(visible=True),
         )
         """
+        # I am commenting out voice input change because it is still in development phase
         """
         voice_input.change(
             process_voice_input,
         )
         """
+        # I am commenting out TTS button because it is still in development phase
         """
         tts_btn = gr.Button("🔊 Speak Response")
         tts_btn.click(

backend/db.py CHANGED Viewed

@@ -28,7 +28,7 @@ class SimpleDB:
         """Add a document to the database"""
         db = self._read_db()
-        # Generate a simple ID
         doc_id = len(db["documents"]) + 1
         # Add document
@@ -60,10 +60,10 @@ class SimpleDB:
         """Log a user query and its response"""
         db = self._read_db()
-        # Generate a simple ID
         query_id = len(db["queries"]) + 1
-        # Add query
         db["queries"].append({
             "id": query_id,
             "query_text": query_text,

         """Add a document to the database"""
         db = self._read_db()
+        # Generating a simple ID
         doc_id = len(db["documents"]) + 1
         # Add document
         """Log a user query and its response"""
         db = self._read_db()
+        # Generating a simple ID
         query_id = len(db["queries"]) + 1
+        # Adding query
         db["queries"].append({
             "id": query_id,
             "query_text": query_text,

backend/document_parser.py CHANGED Viewed

@@ -22,17 +22,17 @@ class SimpleDocumentParser:
         elif file_ext in ['.csv', '.xlsx', '.xls']:
             return self.parse_tabular(file_path)
         else:
-            # Default to text parsing
             return self.parse_text(file_path)
     def parse_pdf(self, file_path: str) -> List[str]:
         """Parse PDF using PyMuPDF"""
         chunks = []
         try:
-            # Open the PDF
             doc = fitz.open(file_path)
-            # Extract text from each page
             for page_num in range(len(doc)):
                 page = doc.load_page(page_num)
                 text = page.get_text()
@@ -57,7 +57,7 @@ class SimpleDocumentParser:
             with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                 text = f.read()
-            # Split by paragraphs
             paragraphs = text.split('\n\n')
             for para in paragraphs:
                 if len(para.strip()) > 0:
@@ -74,7 +74,7 @@ class SimpleDocumentParser:
         try:
             doc = docx.Document(file_path)
-            # Extract text from paragraphs
             for para in doc.paragraphs:
                 if len(para.text.strip()) > 0:
                     chunks.append(para.text.strip())
@@ -85,7 +85,7 @@ class SimpleDocumentParser:
         return chunks
     def parse_tabular(self, file_path: str) -> List[str]:
-        """Parse CSV or Excel files using pandas"""
         chunks = []
         try:
             file_ext = os.path.splitext(file_path)[1].lower()
@@ -95,22 +95,22 @@ class SimpleDocumentParser:
             else:  # Excel files
                 df = pd.read_excel(file_path)
-            # Add table summary
             summary = f"Table with {len(df)} rows and {len(df.columns)} columns. "
             summary += f"Columns: {', '.join(df.columns.tolist())}"
             chunks.append(summary)
-            # Add column descriptions with data types
             col_types = df.dtypes.to_dict()
             col_desc = "Column details:\n"
             for col, dtype in col_types.items():
-                # Add sample values for each column (first 3 unique values)
                 sample_values = df[col].dropna().unique()[:3]
                 sample_str = ", ".join([str(v) for v in sample_values])
                 col_desc += f"- {col} (Type: {dtype}): Sample values: {sample_str}\n"
             chunks.append(col_desc)
-            # Convert each row to a text chunk (limit to first 50 rows for indexing)
             for index, row in df.head(50).iterrows():
                 row_text = " | ".join([f"{col}: {val}" for col, val in row.items()])
                 chunks.append(row_text)

         elif file_ext in ['.csv', '.xlsx', '.xls']:
             return self.parse_tabular(file_path)
         else:
             return self.parse_text(file_path)
     def parse_pdf(self, file_path: str) -> List[str]:
         """Parse PDF using PyMuPDF"""
         chunks = []
         try:
+            # Opening the PDF
             doc = fitz.open(file_path)
+            # Extracting text from each page
             for page_num in range(len(doc)):
                 page = doc.load_page(page_num)
                 text = page.get_text()
             with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                 text = f.read()
+            # Splitting by paragraphs
             paragraphs = text.split('\n\n')
             for para in paragraphs:
                 if len(para.strip()) > 0:
         try:
             doc = docx.Document(file_path)
+            # Extracting text from paragraphs
             for para in doc.paragraphs:
                 if len(para.text.strip()) > 0:
                     chunks.append(para.text.strip())
         return chunks
     def parse_tabular(self, file_path: str) -> List[str]:
+        """Parsing CSV or Excel files using pandas"""
         chunks = []
         try:
             file_ext = os.path.splitext(file_path)[1].lower()
             else:  # Excel files
                 df = pd.read_excel(file_path)
+            # Adding table summary
             summary = f"Table with {len(df)} rows and {len(df.columns)} columns. "
             summary += f"Columns: {', '.join(df.columns.tolist())}"
             chunks.append(summary)
+            # Adding column descriptions with data types
             col_types = df.dtypes.to_dict()
             col_desc = "Column details:\n"
             for col, dtype in col_types.items():
+                # Adding sample values for each column (first 3 unique values)
                 sample_values = df[col].dropna().unique()[:3]
                 sample_str = ", ".join([str(v) for v in sample_values])
                 col_desc += f"- {col} (Type: {dtype}): Sample values: {sample_str}\n"
             chunks.append(col_desc)
+            # Converting each row to a text chunk (limit to first 50 rows for indexing)
             for index, row in df.head(50).iterrows():
                 row_text = " | ".join([f"{col}: {val}" for col, val in row.items()])
                 chunks.append(row_text)

backend/vector_db.py CHANGED Viewed

@@ -42,8 +42,8 @@ class ChromaVectorDB:
         return results
     def delete_document(self, file_path: str):
-        """Delete all chunks from a specific document"""
-        # Get all IDs related to this document
         results = self.collection.get(
             where={"source": file_path}
         )
@@ -54,11 +54,11 @@ class ChromaVectorDB:
     def reset_collection(self):
         """Reset the collection by clearing all documents"""
         try:
-            # Get all document IDs
             try:
                 all_ids = self.collection.get()["ids"]
                 if all_ids:
-                    # Delete all documents
                     self.collection.delete(ids=all_ids)
                     print(f"Deleted {len(all_ids)} documents from collection")
                 else:
@@ -67,7 +67,7 @@ class ChromaVectorDB:
             except Exception as e:
                 print(f"Error getting or deleting documents: {str(e)}")
-                # Try recreating the collection as a fallback
                 try:
                     self.client.delete_collection("documents")
                     self.collection = self.client.get_or_create_collection("documents")

         return results
     def delete_document(self, file_path: str):
+        """Deleting all chunks from a specific document"""
+        # Getting all IDs related to this document
         results = self.collection.get(
             where={"source": file_path}
         )
     def reset_collection(self):
         """Reset the collection by clearing all documents"""
         try:
+            # Getting all document IDs
             try:
                 all_ids = self.collection.get()["ids"]
                 if all_ids:
+                    # Deleting all documents
                     self.collection.delete(ids=all_ids)
                     print(f"Deleted {len(all_ids)} documents from collection")
                 else:
             except Exception as e:
                 print(f"Error getting or deleting documents: {str(e)}")
+                # Trying to recreate the collection as a fallback
                 try:
                     self.client.delete_collection("documents")
                     self.collection = self.client.get_or_create_collection("documents")