Spaces:

richardprobe
/

pdf_upload

Build error

Richard Hsu commited on Jul 26, 2024

Commit

625af68

1 Parent(s): f5be035

push

Files changed (3) hide show

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ OPENAI_API_KEY=sk-proj-opQQ4OzmiZ0C6AB71uOfT3BlbkFJBgC8hdxz2ZaEP3gXryMC

app.py CHANGED Viewed

@@ -1,7 +1,14 @@
 import gradio as gr
 from langchain.document_loaders import PyPDFLoader
-def extract_text_from_pdf(pdf_file):
     # Load the PDF file using PyPDFLoader
     loader = PyPDFLoader(pdf_file.name)
     documents = loader.load()
@@ -11,15 +18,25 @@ def extract_text_from_pdf(pdf_file):
     for document in documents:
         text += document.page_content
-    return text
 # Create a Gradio interface
 interface = gr.Interface(
-    fn=extract_text_from_pdf,
     inputs=gr.File(label="Upload PDF"),
-    outputs=gr.Textbox(label="Extracted Text"),
-    title="PDF Text Extractor",
-    description="Upload a PDF file to extract and display its text content."
 )
 # Launch the interface

 import gradio as gr
 from langchain.document_loaders import PyPDFLoader
+from langchain.chains.summarize import load_summarize_chain
+from langchain.llms import OpenAI
+from dotenv import load_dotenv
+import os
+# Load environment variables from .env file
+load_dotenv()
+def extract_text_and_summary_from_pdf(pdf_file):
     # Load the PDF file using PyPDFLoader
     loader = PyPDFLoader(pdf_file.name)
     documents = loader.load()
     for document in documents:
         text += document.page_content
+    # Initialize the OpenAI model with the API key from environment variables
+    openai_api_key = os.getenv("OPENAI_API_KEY")
+    llm = OpenAI(model="gpt-3.5-turbo", temperature=0, api_key=openai_api_key)
+    # Load the summarization chain
+    summarize_chain = load_summarize_chain(llm)
+    # Get the summary of the text
+    summary = summarize_chain.run(text)
+    return text, summary
 # Create a Gradio interface
 interface = gr.Interface(
+    fn=extract_text_and_summary_from_pdf,
     inputs=gr.File(label="Upload PDF"),
+    outputs=[gr.Textbox(label="Extracted Text"), gr.Textbox(label="Summary")],
+    title="PDF Text Extractor and Summarizer",
+    description="Upload a PDF file to extract and display its text content and summary."
 )
 # Launch the interface

requirements.txt CHANGED Viewed

@@ -67,4 +67,6 @@ uvicorn==0.22.0
 websockets==11.0.3
 yarl==1.9.2
 pypdf==3.10.0
-pypdf2

 websockets==11.0.3
 yarl==1.9.2
 pypdf==3.10.0
+pypdf2
+python-dotenv
+openai