Spaces:

girishwangikar
/

PDF_And_Text_Summarizer

Sleeping

App Files Files Community

girishwangikar commited on Aug 28, 2024

Commit

a914df4

verified ·

1 Parent(s): 10be217

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -46

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ groq_api_key = os.environ.get('GROQ_API_KEY')
 # Set up LLM
 llm = ChatGroq(temperature=0, model_name='llama-3.1-8b-instant', groq_api_key=groq_api_key)
 def extract_text_from_pdf(pdf_file):
     pdf_reader = PyPDF2.PdfReader(pdf_file)
     text = ""
@@ -21,36 +22,39 @@ def extract_text_from_pdf(pdf_file):
 def chunk_text(text):
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=4000,
-        chunk_overlap=400,
         length_function=len
     )
     chunks = text_splitter.split_text(text)
     return [Document(page_content=chunk) for chunk in chunks]
-def summarize_chunks(chunks):
-    # Prompt for the initial summarization of each chunk
-    map_prompt_template = """Write a detailed summary of the following text:
-    "{text}"
-    DETAILED SUMMARY:"""
     map_prompt = PromptTemplate(template=map_prompt_template, input_variables=["text"])
-    # Prompt for combining the summaries
-    combine_prompt_template = """Write a comprehensive summary of the following text, capturing key points and main ideas:
-    "{text}"
-    COMPREHENSIVE SUMMARY:"""
     combine_prompt = PromptTemplate(template=combine_prompt_template, input_variables=["text"])
-    # Check the total length of the chunks
     total_length = sum(len(chunk.page_content) for chunk in chunks)
-    if total_length < 10000:  # For shorter documents
         chain = load_summarize_chain(
-            llm,
-            chain_type="stuff",
             prompt=combine_prompt
         )
-    else:  # For longer documents
         chain = load_summarize_chain(
             llm,
             chain_type="map_reduce",
@@ -58,59 +62,61 @@ def summarize_chunks(chunks):
             combine_prompt=combine_prompt,
             verbose=True
         )
     summary = chain.run(chunks)
     return summary
-def summarize_content(pdf_file, text_input):
     if pdf_file is None and not text_input:
         return "Please upload a PDF file or enter text to summarize."
     if pdf_file is not None:
         # Extract text from PDF
         text = extract_text_from_pdf(pdf_file)
     else:
         # Use the input text
         text = text_input
     # Chunk the text
     chunks = chunk_text(text)
-    # Summarize chunks
-    final_summary = summarize_chunks(chunks)
     return final_summary
 with gr.Blocks(theme=gr.themes.Soft()) as iface:
     gr.Markdown(
-    """
-    # PDF And Text Summarizer
-    ### Advanced PDF and Text Summarization -
-    Upload your PDF document or enter text directly, and let AI generate a concise, informative summary.
-    """
     )
     with gr.Row():
         with gr.Column(scale=1):
             input_pdf = gr.File(label="Upload PDF (optional)", file_types=[".pdf"])
             input_text = gr.Textbox(label="Or enter text here", lines=5, placeholder="Paste or type your text here...")
             submit_btn = gr.Button("Generate Summary", variant="primary")
         with gr.Column(scale=2):
             output = gr.Textbox(label="Generated Summary", lines=10)
     gr.Markdown(
-    """
-    ### How it works
-    1. Upload a PDF file or enter text directly
-    2. Click "Generate Summary"
-    3. Wait for the AI to process and summarize your content
-    4. Review the generated summary
-    *Powered by LLAMA 3.1 8B model and LangChain*
-    """
     )
-    submit_btn.click(summarize_content, inputs=[input_pdf, input_text], outputs=output)
 iface.launch()

 # Set up LLM
 llm = ChatGroq(temperature=0, model_name='llama-3.1-8b-instant', groq_api_key=groq_api_key)
 def extract_text_from_pdf(pdf_file):
     pdf_reader = PyPDF2.PdfReader(pdf_file)
     text = ""
 def chunk_text(text):
     text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=4000,
+        chunk_overlap=400,
         length_function=len
     )
     chunks = text_splitter.split_text(text)
     return [Document(page_content=chunk) for chunk in chunks]
+def summarize_chunks(chunks, conciseness):
+    # Adjust the prompts based on the conciseness level
+    map_prompt_template = f"""Write a {'very concise' if conciseness > 0.7 else 'detailed'} summary of the following text, focusing on the {'most crucial' if conciseness > 0.7 else 'key'} points:
+    "{{text}}"
+    {'CONCISE' if conciseness > 0.7 else 'DETAILED'} SUMMARY:"""
+    combine_prompt_template = f"""Write a {'highly condensed' if conciseness > 0.7 else 'comprehensive'} summary of the following text, capturing the {'essential' if conciseness > 0.7 else 'key'} points and main ideas:
+    "{{text}}"
+    {'CONDENSED' if conciseness > 0.7 else 'COMPREHENSIVE'} SUMMARY:"""
     map_prompt = PromptTemplate(template=map_prompt_template, input_variables=["text"])
     combine_prompt = PromptTemplate(template=combine_prompt_template, input_variables=["text"])
+    # Adjust the chain type based on the document length and conciseness
     total_length = sum(len(chunk.page_content) for chunk in chunks)
+    if total_length < 10000 or conciseness > 0.8:
         chain = load_summarize_chain(
+            llm,
+            chain_type="stuff",
             prompt=combine_prompt
         )
+    else:
         chain = load_summarize_chain(
             llm,
             chain_type="map_reduce",
             combine_prompt=combine_prompt,
             verbose=True
         )
     summary = chain.run(chunks)
     return summary
+def summarize_content(pdf_file, text_input, conciseness):
     if pdf_file is None and not text_input:
         return "Please upload a PDF file or enter text to summarize."
     if pdf_file is not None:
         # Extract text from PDF
         text = extract_text_from_pdf(pdf_file)
     else:
         # Use the input text
         text = text_input
     # Chunk the text
     chunks = chunk_text(text)
+    # Summarize chunks with conciseness level
+    final_summary = summarize_chunks(chunks, conciseness)
     return final_summary
 with gr.Blocks(theme=gr.themes.Soft()) as iface:
     gr.Markdown(
+        """
+        # PDF And Text Summarizer
+        ### Advanced PDF and Text Summarization with Conciseness Control
+        - Upload your PDF document or enter text directly, adjust the conciseness level, and let AI generate a summary.
+        """
     )
     with gr.Row():
         with gr.Column(scale=1):
             input_pdf = gr.File(label="Upload PDF (optional)", file_types=[".pdf"])
             input_text = gr.Textbox(label="Or enter text here", lines=5, placeholder="Paste or type your text here...")
+            conciseness_slider = gr.Slider(minimum=0, maximum=1, value=0.5, step=0.1, label="Conciseness Level")
             submit_btn = gr.Button("Generate Summary", variant="primary")
         with gr.Column(scale=2):
             output = gr.Textbox(label="Generated Summary", lines=10)
     gr.Markdown(
+        """
+        ### How it works
+        1. Upload a PDF file or enter text directly
+        2. Adjust the conciseness level:
+           - 0 (Most detailed) to 1 (Most concise)
+        3. Click "Generate Summary"
+        4. Wait for the AI to process and summarize your content
+        5. Review the generated summary
+        *Powered by LLAMA 3.1 8B model and LangChain*
+        """
     )
+    submit_btn.click(summarize_content, inputs=[input_pdf, input_text, conciseness_slider], outputs=output)
 iface.launch()