Spaces:

rchrdgwr
/

CoolApp

Sleeping

App Files Files Community

rchrdgwr commited on Aug 26, 2024

Commit

aba4ee7

1 Parent(s): 0fbd1a9

file clean up

Browse files

Files changed (2) hide show

app.py +3 -1
richard/text_utils.py +5 -7

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ from richard.text_utils import FileLoader
 from richard.pipeline import RetrievalAugmentedQAPipeline
 # from richard.vector_database import QdrantDatabase
 from qdrant_client import QdrantClient
 def process_file(file, use_rct):
     fileLoader = FileLoader()
@@ -37,6 +38,7 @@ user_role_prompt = UserRolePrompt(user_prompt_template)
 @cl.on_chat_start
 async def on_chat_start():
     res = await cl.AskActionMessage(
         content="Do you want to use Qdrant?",
         actions=[
@@ -98,7 +100,7 @@ async def on_chat_start():
     await msg.send()
     # decide if to use the dict vector store of the Qdrant vector store
-    from qdrant_client.models import PointStruct, VectorParams
     # Create a dict vector store
     if use_qdrant == False:
         vector_db = VectorDatabase()

 from richard.pipeline import RetrievalAugmentedQAPipeline
 # from richard.vector_database import QdrantDatabase
 from qdrant_client import QdrantClient
+from qdrant_client.models import VectorParams
 def process_file(file, use_rct):
     fileLoader = FileLoader()
 @cl.on_chat_start
 async def on_chat_start():
+    # get user inputs
     res = await cl.AskActionMessage(
         content="Do you want to use Qdrant?",
         actions=[
     await msg.send()
     # decide if to use the dict vector store of the Qdrant vector store
     # Create a dict vector store
     if use_qdrant == False:
         vector_db = VectorDatabase()

richard/text_utils.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import os
 from typing import List
-import fitz
 import tempfile
 from aimakerspace.text_utils import CharacterTextSplitter
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 # load the file
 class FileLoader:
@@ -23,6 +22,7 @@ class FileLoader:
         else:
             text_splitter=CharacterTextSplitter()
         file_extension = os.path.splitext(file.name)[1].lower()
         with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=file_extension) as temp_file:
             self.temp_file_path = temp_file.name
             temp_file.write(file.content)
@@ -36,22 +36,19 @@ class FileLoader:
                 raise ValueError(
                     f"Unsupported file type: {self.temp_file_path}"
                 )
-            print(self.documents)
             return text_splitter.split_text(self.documents)
         else:
             raise ValueError(
                     "Not a file"
                 )
     def load_text_file(self):
         with open(self.temp_file_path, "r", encoding=self.encoding) as f:
             self.documents.append(f.read())
     def load_pdf_file(self):
-        print("load_pdf_file()")
         pdf_document = fitz.open(self.temp_file_path)
-        print(len(pdf_document))
         for page_num in range(len(pdf_document)):
             page = pdf_document.load_page(page_num)
             text = page.get_text()
@@ -85,6 +82,7 @@ class CharacterTextSplitter:
 class MyRecursiveCharacterTextSplitter:
     def __init__(
         self
     ):

 import os
 from typing import List
+import fitz # pymupdf
 import tempfile
 from aimakerspace.text_utils import CharacterTextSplitter
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 # load the file
+# handle .txt and .pdf
 class FileLoader:
         else:
             text_splitter=CharacterTextSplitter()
         file_extension = os.path.splitext(file.name)[1].lower()
         with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=file_extension) as temp_file:
             self.temp_file_path = temp_file.name
             temp_file.write(file.content)
                 raise ValueError(
                     f"Unsupported file type: {self.temp_file_path}"
                 )
             return text_splitter.split_text(self.documents)
         else:
             raise ValueError(
                     "Not a file"
                 )
     def load_text_file(self):
         with open(self.temp_file_path, "r", encoding=self.encoding) as f:
             self.documents.append(f.read())
     def load_pdf_file(self):
+        # pymupdf
         pdf_document = fitz.open(self.temp_file_path)
         for page_num in range(len(pdf_document)):
             page = pdf_document.load_page(page_num)
             text = page.get_text()
 class MyRecursiveCharacterTextSplitter:
+    # uses langChain.RecursiveCharacterTextSplitter
     def __init__(
         self
     ):