Spaces:

Kathirsci
/

Report_summarizer

Sleeping

App Files Files Community

Kathirsci commited on Sep 23, 2024

Commit

3f068be

verified ·

1 Parent(s): 895f085

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -16

app.py CHANGED Viewed

@@ -2,11 +2,10 @@ import streamlit as st
 import tempfile
 import logging
 from typing import List
-from langchain_community.document_loaders import PyPDFLoader
-#from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_huggingface import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
-from langchain_community.llms import HuggingFacePipeline
 from langchain.chains.summarize import load_summarize_chain
 from langchain.schema import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -35,7 +34,7 @@ def load_embeddings():
 def load_llm(model_name):
     """Load and cache the language model."""
     try:
-        pipe = pipeline("text2text-generation", model=model_name, max_length=1024)
         return HuggingFacePipeline(pipeline=pipe)
     except Exception as e:
         logger.error(f"Failed to load LLM: {e}")
@@ -48,7 +47,7 @@ def process_pdf(file) -> List[Document]:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
             temp_file.write(file.getvalue())
             temp_file_path = temp_file.name
         loader = PyPDFLoader(file_path=temp_file_path)
         pages = loader.load()
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
@@ -73,7 +72,6 @@ def summarize_report(documents: List[Document], llm) -> str:
     try:
         prompt_template = """
         <s>[INST] You are an advanced AI assistant with expertise in summarizing technical documents. Your goal is to create a clear, concise, and well-organized summary using Markdown formatting. Focus on extracting and presenting the essential points of the document effectively.
     *Instructions:*
     - Analyze the provided context and input carefully.
     - Identify and highlight the key points, main arguments, and important details.
@@ -82,30 +80,24 @@ def summarize_report(documents: List[Document], llm) -> str:
         - Use **text** for important terms or concepts.
         - Provide a brief introduction, followed by the main points, and a concluding summary if applicable.
     - Ensure the summary is easy to read and understand, avoiding unnecessary jargon.
     *Example Summary Format:*
     # Overview
     *Document Title:* Technical Analysis Report
     *Summary:*
     The report provides an in-depth analysis of the recent technical advancements in AI. It covers key areas such as ...
     # Key Findings
     - *Finding 1:* Description of finding 1.
     - *Finding 2:* Description of finding 2.
     # Conclusion
     The analysis highlights the significant advancements and future directions for AI technology.
     *Your Response:* [/INST]</s> {input}
     Context: {context}
     """
         prompt = PromptTemplate.from_template(prompt_template)
         chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
-        summary = chain.invoke(documents)
-        return summary['output_text']
     except Exception as e:
         logger.error(f"Error summarizing report: {e}")
@@ -114,7 +106,7 @@ def summarize_report(documents: List[Document], llm) -> str:
 def main():
     st.title("Report Summarizer")
     model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)
     uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")

 import tempfile
 import logging
 from typing import List
+from langchain.document_loaders import PyPDFLoader
 from langchain_huggingface import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.llms import HuggingFacePipeline
 from langchain.chains.summarize import load_summarize_chain
 from langchain.schema import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 def load_llm(model_name):
     """Load and cache the language model."""
     try:
+        pipe = pipeline("text-generation", model=model_name, max_length=1024)
         return HuggingFacePipeline(pipeline=pipe)
     except Exception as e:
         logger.error(f"Failed to load LLM: {e}")
         with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
             temp_file.write(file.getvalue())
             temp_file_path = temp_file.name
         loader = PyPDFLoader(file_path=temp_file_path)
         pages = loader.load()
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
     try:
         prompt_template = """
         <s>[INST] You are an advanced AI assistant with expertise in summarizing technical documents. Your goal is to create a clear, concise, and well-organized summary using Markdown formatting. Focus on extracting and presenting the essential points of the document effectively.
     *Instructions:*
     - Analyze the provided context and input carefully.
     - Identify and highlight the key points, main arguments, and important details.
         - Use **text** for important terms or concepts.
         - Provide a brief introduction, followed by the main points, and a concluding summary if applicable.
     - Ensure the summary is easy to read and understand, avoiding unnecessary jargon.
     *Example Summary Format:*
     # Overview
     *Document Title:* Technical Analysis Report
     *Summary:*
     The report provides an in-depth analysis of the recent technical advancements in AI. It covers key areas such as ...
     # Key Findings
     - *Finding 1:* Description of finding 1.
     - *Finding 2:* Description of finding 2.
     # Conclusion
     The analysis highlights the significant advancements and future directions for AI technology.
     *Your Response:* [/INST]</s> {input}
     Context: {context}
     """
         prompt = PromptTemplate.from_template(prompt_template)
         chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
+        summary = chain.run(documents)
+        return summary
     except Exception as e:
         logger.error(f"Error summarizing report: {e}")
 def main():
     st.title("Report Summarizer")
     model_option = st.sidebar.text_input("Enter model name", value=DEFAULT_MODEL)
     uploaded_file = st.sidebar.file_uploader("Upload your Report", type="pdf")