Spaces:

girishwangikar
/

PDF_And_Text_Summarizer

Sleeping

App Files Files Community

girishwangikar commited on Aug 24, 2024

Commit

a0f19e1

verified ·

1 Parent(s): 3600dcc

Create app.py

Browse files

Files changed (1) hide show

app.py +120 -0

app.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import os
+from google.colab import userdata
+import gradio as gr
+from langchain_groq import ChatGroq
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains.summarize import load_summarize_chain
+from langchain.docstore.document import Document
+import PyPDF2
+from langchain.prompts import PromptTemplate
+# Set up API keys
+hf_api_key = userdata.get('HF_TOKEN')
+groq_api_key = userdata.get('GROQ_API_KEY')
+os.environ['HF_TOKEN'] = hf_api_key
+os.environ['GROQ_API_KEY'] = groq_api_key
+# Set up LLM
+llm = ChatGroq(temperature=0, model_name='llama-3.1-8b-instant', groq_api_key=groq_api_key)
+def extract_text_from_pdf(pdf_file):
+    pdf_reader = PyPDF2.PdfReader(pdf_file)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    return text
+def chunk_text(text):
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=4000,
+        chunk_overlap=400,
+        length_function=len
+    )
+    chunks = text_splitter.split_text(text)
+    return [Document(page_content=chunk) for chunk in chunks]
+def summarize_chunks(chunks):
+    # Prompt for the initial summarization of each chunk
+    map_prompt_template = """Write a detailed summary of the following text:
+    "{text}"
+    DETAILED SUMMARY:"""
+    map_prompt = PromptTemplate(template=map_prompt_template, input_variables=["text"])
+    # Prompt for combining the summaries
+    combine_prompt_template = """Write a comprehensive summary of the following text, capturing key points and main ideas:
+    "{text}"
+    COMPREHENSIVE SUMMARY:"""
+    combine_prompt = PromptTemplate(template=combine_prompt_template, input_variables=["text"])
+    # Check the total length of the chunks
+    total_length = sum(len(chunk.page_content) for chunk in chunks)
+    if total_length < 10000:  # For shorter documents
+        chain = load_summarize_chain(
+            llm,
+            chain_type="stuff",
+            prompt=combine_prompt
+        )
+    else:  # For longer documents
+        chain = load_summarize_chain(
+            llm,
+            chain_type="map_reduce",
+            map_prompt=map_prompt,
+            combine_prompt=combine_prompt,
+            verbose=True
+        )
+    summary = chain.run(chunks)
+    return summary
+def summarize_content(pdf_file, text_input):
+    if pdf_file is None and not text_input:
+        return "Please upload a PDF file or enter text to summarize."
+    if pdf_file is not None:
+        # Extract text from PDF
+        text = extract_text_from_pdf(pdf_file)
+    else:
+        # Use the input text
+        text = text_input
+    # Chunk the text
+    chunks = chunk_text(text)
+    # Summarize chunks
+    final_summary = summarize_chunks(chunks)
+    return final_summary
+with gr.Blocks(theme=gr.themes.Soft()) as iface:
+    gr.Markdown(
+    """
+    # PDF And Text Summarizer
+    ### Advanced PDF and Text Summarization -
+    Upload your PDF document or enter text directly, and let AI generate a concise, informative summary.
+    """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_pdf = gr.File(label="Upload PDF (optional)", file_types=[".pdf"])
+            input_text = gr.Textbox(label="Or enter text here", lines=5, placeholder="Paste or type your text here...")
+            submit_btn = gr.Button("Generate Summary", variant="primary")
+        with gr.Column(scale=2):
+            output = gr.Textbox(label="Generated Summary", lines=10)
+    gr.Markdown(
+    """
+    ### How it works
+    1. Upload a PDF file or enter text directly
+    2. Click "Generate Summary"
+    3. Wait for the AI to process and summarize your content
+    4. Review the generated summary
+    *Powered by LLAMA 3.1 8B model and LangChain*
+    """
+    )
+    submit_btn.click(summarize_content, inputs=[input_pdf, input_text], outputs=output)
+iface.launch()