Upload folder using huggingface_hub
Browse files- src/__pycache__/doc_loading.cpython-39.pyc +0 -0
- src/__pycache__/gradio_app.cpython-310.pyc +0 -0
- src/__pycache__/gradio_app.cpython-39.pyc +0 -0
- src/__pycache__/legal_implications.cpython-39.pyc +0 -0
- src/__pycache__/llm_utils.cpython-39.pyc +0 -0
- src/__pycache__/mailing.cpython-39.pyc +0 -0
- src/__pycache__/model_loading.cpython-39.pyc +0 -0
- src/__pycache__/prompts.cpython-39.pyc +0 -0
- src/__pycache__/summarization.cpython-39.pyc +0 -0
- src/doc_loading.py +9 -0
- src/llm_utils.py +9 -2
- src/summarization.py +1 -1
src/__pycache__/doc_loading.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/doc_loading.cpython-39.pyc and b/src/__pycache__/doc_loading.cpython-39.pyc differ
|
|
|
src/__pycache__/gradio_app.cpython-310.pyc
ADDED
|
Binary file (10.5 kB). View file
|
|
|
src/__pycache__/gradio_app.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/gradio_app.cpython-39.pyc and b/src/__pycache__/gradio_app.cpython-39.pyc differ
|
|
|
src/__pycache__/legal_implications.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/legal_implications.cpython-39.pyc and b/src/__pycache__/legal_implications.cpython-39.pyc differ
|
|
|
src/__pycache__/llm_utils.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/llm_utils.cpython-39.pyc and b/src/__pycache__/llm_utils.cpython-39.pyc differ
|
|
|
src/__pycache__/mailing.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/mailing.cpython-39.pyc and b/src/__pycache__/mailing.cpython-39.pyc differ
|
|
|
src/__pycache__/model_loading.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/model_loading.cpython-39.pyc and b/src/__pycache__/model_loading.cpython-39.pyc differ
|
|
|
src/__pycache__/prompts.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/prompts.cpython-39.pyc and b/src/__pycache__/prompts.cpython-39.pyc differ
|
|
|
src/__pycache__/summarization.cpython-39.pyc
CHANGED
|
Binary files a/src/__pycache__/summarization.cpython-39.pyc and b/src/__pycache__/summarization.cpython-39.pyc differ
|
|
|
src/doc_loading.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
| 1 |
from langchain.document_loaders import PyPDFLoader, TextLoader
|
| 2 |
from langchain.docstore.document import Document
|
| 3 |
from typing import List
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
def load_docs(file_path: str, with_pageinfo: bool = True) -> List[Document]:
|
|
@@ -17,8 +20,14 @@ def load_docs(file_path: str, with_pageinfo: bool = True) -> List[Document]:
|
|
| 17 |
List[Document]: List of documents.
|
| 18 |
"""
|
| 19 |
if file_path.endswith(".pdf"):
|
|
|
|
| 20 |
loader = PyPDFLoader(file_path)
|
| 21 |
docs = loader.load()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
elif file_path.endswith(".txt"):
|
| 23 |
loader = TextLoader(file_path)
|
| 24 |
docs = loader.load()
|
|
|
|
| 1 |
from langchain.document_loaders import PyPDFLoader, TextLoader
|
| 2 |
from langchain.docstore.document import Document
|
| 3 |
from typing import List
|
| 4 |
+
from langchain.text_splitter import (
|
| 5 |
+
RecursiveCharacterTextSplitter,
|
| 6 |
+
)
|
| 7 |
|
| 8 |
|
| 9 |
def load_docs(file_path: str, with_pageinfo: bool = True) -> List[Document]:
|
|
|
|
| 20 |
List[Document]: List of documents.
|
| 21 |
"""
|
| 22 |
if file_path.endswith(".pdf"):
|
| 23 |
+
# load documents
|
| 24 |
loader = PyPDFLoader(file_path)
|
| 25 |
docs = loader.load()
|
| 26 |
+
# # split documents
|
| 27 |
+
# text_splitter = RecursiveCharacterTextSplitter(
|
| 28 |
+
# chunk_size=1000, chunk_overlap=150
|
| 29 |
+
# )
|
| 30 |
+
# docs = text_splitter.split_documents(docs)
|
| 31 |
elif file_path.endswith(".txt"):
|
| 32 |
loader = TextLoader(file_path)
|
| 33 |
docs = loader.load()
|
src/llm_utils.py
CHANGED
|
@@ -20,11 +20,18 @@ async def async_generate_llmchain(
|
|
| 20 |
Returns:
|
| 21 |
dict: Dictionary with the summarization.
|
| 22 |
"""
|
|
|
|
| 23 |
print(f"Starting summarization for {k}")
|
| 24 |
now = time.time()
|
| 25 |
-
chain = LLMChain(llm=llm, **llm_kwargs)
|
| 26 |
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
print(f"Time taken for {k}: ", time.time() - now)
|
| 29 |
return {k: resp}
|
| 30 |
|
|
|
|
| 20 |
Returns:
|
| 21 |
dict: Dictionary with the summarization.
|
| 22 |
"""
|
| 23 |
+
|
| 24 |
print(f"Starting summarization for {k}")
|
| 25 |
now = time.time()
|
|
|
|
| 26 |
|
| 27 |
+
# chain = LLMChain(llm=llm, **llm_kwargs)
|
| 28 |
+
chain = load_summarize_chain(
|
| 29 |
+
llm=llm,
|
| 30 |
+
**llm_kwargs,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# resp = await chain.arun(text=docs)
|
| 34 |
+
resp = await chain.arun(docs)
|
| 35 |
print(f"Time taken for {k}: ", time.time() - now)
|
| 36 |
return {k: resp}
|
| 37 |
|
src/summarization.py
CHANGED
|
@@ -5,7 +5,7 @@ from src.prompts import (
|
|
| 5 |
prompts_parallel_summary,
|
| 6 |
)
|
| 7 |
from src.doc_loading import load_docs
|
| 8 |
-
from src.llm_utils import async_generate_llmchain
|
| 9 |
import time
|
| 10 |
from typing import Dict, List
|
| 11 |
import asyncio
|
|
|
|
| 5 |
prompts_parallel_summary,
|
| 6 |
)
|
| 7 |
from src.doc_loading import load_docs
|
| 8 |
+
from src.llm_utils import async_generate_llmchain, async_generate_summary_chain
|
| 9 |
import time
|
| 10 |
from typing import Dict, List
|
| 11 |
import asyncio
|