Upload folder using huggingface_hub

Files changed (12) hide show

src/__pycache__/doc_loading.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/doc_loading.cpython-39.pyc and b/src/__pycache__/doc_loading.cpython-39.pyc differ

src/__pycache__/gradio_app.cpython-310.pyc ADDED Viewed

Binary file (10.5 kB). View file

src/__pycache__/gradio_app.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/gradio_app.cpython-39.pyc and b/src/__pycache__/gradio_app.cpython-39.pyc differ

src/__pycache__/legal_implications.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/legal_implications.cpython-39.pyc and b/src/__pycache__/legal_implications.cpython-39.pyc differ

src/__pycache__/llm_utils.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/llm_utils.cpython-39.pyc and b/src/__pycache__/llm_utils.cpython-39.pyc differ

src/__pycache__/mailing.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/mailing.cpython-39.pyc and b/src/__pycache__/mailing.cpython-39.pyc differ

src/__pycache__/model_loading.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/model_loading.cpython-39.pyc and b/src/__pycache__/model_loading.cpython-39.pyc differ

src/__pycache__/prompts.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/prompts.cpython-39.pyc and b/src/__pycache__/prompts.cpython-39.pyc differ

src/__pycache__/summarization.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/summarization.cpython-39.pyc and b/src/__pycache__/summarization.cpython-39.pyc differ

src/doc_loading.py CHANGED Viewed

@@ -1,6 +1,9 @@
 from langchain.document_loaders import PyPDFLoader, TextLoader
 from langchain.docstore.document import Document
 from typing import List
 def load_docs(file_path: str, with_pageinfo: bool = True) -> List[Document]:
@@ -17,8 +20,14 @@ def load_docs(file_path: str, with_pageinfo: bool = True) -> List[Document]:
         List[Document]: List of documents.
     """
     if file_path.endswith(".pdf"):
         loader = PyPDFLoader(file_path)
         docs = loader.load()
     elif file_path.endswith(".txt"):
         loader = TextLoader(file_path)
         docs = loader.load()

 from langchain.document_loaders import PyPDFLoader, TextLoader
 from langchain.docstore.document import Document
 from typing import List
+from langchain.text_splitter import (
+    RecursiveCharacterTextSplitter,
+)
 def load_docs(file_path: str, with_pageinfo: bool = True) -> List[Document]:
         List[Document]: List of documents.
     """
     if file_path.endswith(".pdf"):
+        # load documents
         loader = PyPDFLoader(file_path)
         docs = loader.load()
+        # # split documents
+        # text_splitter = RecursiveCharacterTextSplitter(
+        #     chunk_size=1000, chunk_overlap=150
+        # )
+        # docs = text_splitter.split_documents(docs)
     elif file_path.endswith(".txt"):
         loader = TextLoader(file_path)
         docs = loader.load()

src/llm_utils.py CHANGED Viewed

@@ -20,11 +20,18 @@ async def async_generate_llmchain(
     Returns:
         dict: Dictionary with the summarization.
     """
     print(f"Starting summarization for {k}")
     now = time.time()
-    chain = LLMChain(llm=llm, **llm_kwargs)
-    resp = await chain.arun(text=docs)
     print(f"Time taken for {k}: ", time.time() - now)
     return {k: resp}

     Returns:
         dict: Dictionary with the summarization.
     """
     print(f"Starting summarization for {k}")
     now = time.time()
+    # chain = LLMChain(llm=llm, **llm_kwargs)
+    chain = load_summarize_chain(
+        llm=llm,
+        **llm_kwargs,
+    )
+    # resp = await chain.arun(text=docs)
+    resp = await chain.arun(docs)
     print(f"Time taken for {k}: ", time.time() - now)
     return {k: resp}

src/summarization.py CHANGED Viewed

@@ -5,7 +5,7 @@ from src.prompts import (
     prompts_parallel_summary,
 )
 from src.doc_loading import load_docs
-from src.llm_utils import async_generate_llmchain
 import time
 from typing import Dict, List
 import asyncio

     prompts_parallel_summary,
 )
 from src.doc_loading import load_docs
+from src.llm_utils import async_generate_llmchain, async_generate_summary_chain
 import time
 from typing import Dict, List
 import asyncio