fvde commited on
Commit
229f176
·
1 Parent(s): 84e6019

Upload folder using huggingface_hub

Browse files
src/__pycache__/doc_loading.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/doc_loading.cpython-39.pyc and b/src/__pycache__/doc_loading.cpython-39.pyc differ
 
src/__pycache__/gradio_app.cpython-310.pyc ADDED
Binary file (10.5 kB). View file
 
src/__pycache__/gradio_app.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/gradio_app.cpython-39.pyc and b/src/__pycache__/gradio_app.cpython-39.pyc differ
 
src/__pycache__/legal_implications.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/legal_implications.cpython-39.pyc and b/src/__pycache__/legal_implications.cpython-39.pyc differ
 
src/__pycache__/llm_utils.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/llm_utils.cpython-39.pyc and b/src/__pycache__/llm_utils.cpython-39.pyc differ
 
src/__pycache__/mailing.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/mailing.cpython-39.pyc and b/src/__pycache__/mailing.cpython-39.pyc differ
 
src/__pycache__/model_loading.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/model_loading.cpython-39.pyc and b/src/__pycache__/model_loading.cpython-39.pyc differ
 
src/__pycache__/prompts.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/prompts.cpython-39.pyc and b/src/__pycache__/prompts.cpython-39.pyc differ
 
src/__pycache__/summarization.cpython-39.pyc CHANGED
Binary files a/src/__pycache__/summarization.cpython-39.pyc and b/src/__pycache__/summarization.cpython-39.pyc differ
 
src/doc_loading.py CHANGED
@@ -1,6 +1,9 @@
1
  from langchain.document_loaders import PyPDFLoader, TextLoader
2
  from langchain.docstore.document import Document
3
  from typing import List
 
 
 
4
 
5
 
6
  def load_docs(file_path: str, with_pageinfo: bool = True) -> List[Document]:
@@ -17,8 +20,14 @@ def load_docs(file_path: str, with_pageinfo: bool = True) -> List[Document]:
17
  List[Document]: List of documents.
18
  """
19
  if file_path.endswith(".pdf"):
 
20
  loader = PyPDFLoader(file_path)
21
  docs = loader.load()
 
 
 
 
 
22
  elif file_path.endswith(".txt"):
23
  loader = TextLoader(file_path)
24
  docs = loader.load()
 
1
  from langchain.document_loaders import PyPDFLoader, TextLoader
2
  from langchain.docstore.document import Document
3
  from typing import List
4
+ from langchain.text_splitter import (
5
+ RecursiveCharacterTextSplitter,
6
+ )
7
 
8
 
9
  def load_docs(file_path: str, with_pageinfo: bool = True) -> List[Document]:
 
20
  List[Document]: List of documents.
21
  """
22
  if file_path.endswith(".pdf"):
23
+ # load documents
24
  loader = PyPDFLoader(file_path)
25
  docs = loader.load()
26
+ # # split documents
27
+ # text_splitter = RecursiveCharacterTextSplitter(
28
+ # chunk_size=1000, chunk_overlap=150
29
+ # )
30
+ # docs = text_splitter.split_documents(docs)
31
  elif file_path.endswith(".txt"):
32
  loader = TextLoader(file_path)
33
  docs = loader.load()
src/llm_utils.py CHANGED
@@ -20,11 +20,18 @@ async def async_generate_llmchain(
20
  Returns:
21
  dict: Dictionary with the summarization.
22
  """
 
23
  print(f"Starting summarization for {k}")
24
  now = time.time()
25
- chain = LLMChain(llm=llm, **llm_kwargs)
26
 
27
- resp = await chain.arun(text=docs)
 
 
 
 
 
 
 
28
  print(f"Time taken for {k}: ", time.time() - now)
29
  return {k: resp}
30
 
 
20
  Returns:
21
  dict: Dictionary with the summarization.
22
  """
23
+
24
  print(f"Starting summarization for {k}")
25
  now = time.time()
 
26
 
27
+ # chain = LLMChain(llm=llm, **llm_kwargs)
28
+ chain = load_summarize_chain(
29
+ llm=llm,
30
+ **llm_kwargs,
31
+ )
32
+
33
+ # resp = await chain.arun(text=docs)
34
+ resp = await chain.arun(docs)
35
  print(f"Time taken for {k}: ", time.time() - now)
36
  return {k: resp}
37
 
src/summarization.py CHANGED
@@ -5,7 +5,7 @@ from src.prompts import (
5
  prompts_parallel_summary,
6
  )
7
  from src.doc_loading import load_docs
8
- from src.llm_utils import async_generate_llmchain
9
  import time
10
  from typing import Dict, List
11
  import asyncio
 
5
  prompts_parallel_summary,
6
  )
7
  from src.doc_loading import load_docs
8
+ from src.llm_utils import async_generate_llmchain, async_generate_summary_chain
9
  import time
10
  from typing import Dict, List
11
  import asyncio