AIfenaike's picture
Update app.py
a19ed50
import os
import textwrap
from langchain.document_loaders import UnstructuredFileLoader
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import chroma
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
from pypdf import PdfReader
import mimetypes
import validators
import requests
import tempfile
import gradio as gr
import openai
disclaimer= """Due to the input token limits of Language Models. This app is not applicable to lengthy documents."""
# def create_knowledge_base(docs):
# # split into chunks
# text_splitter = CharacterTextSplitter(
# separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
# )
# chunks = text_splitter.split_documents(docs)
# # Create knowledge base
# document_store = [Document(page_content=text) for text in chunks]
# return document_store
def summarize_document(open_ai_key,file_obj):
if open_ai_key is not None:
loader = UnstructuredFileLoader(file_obj.name, strategy="fast")
docs = loader.load()
# split into chunks
text_splitter = CharacterTextSplitter(
separator="\n", chunk_size=500, chunk_overlap=0, length_function=len
)
chunks = text_splitter.split_documents(docs)
# Create knowledge base
document_store = [Document(page_content=text) for text in chunks]
os.environ['OPENAI_API_KEY'] = open_ai_key
llm_model = OpenAI(model_name="text-davinci-003",temperature=0.4)
summarization_chain = load_summarize_chain(llm = llm_model,chain_type="stuff")
response = summarization_chain.run(vector_store)
output = textwrap.fill(response, width=100)
print(output)
return output
else:
return "You forgot OpenAI API key"
if __name__ == "__main__":
# make a gradio interface
# Create a web application
app = gr.Interface(
fn=summarize_document,
inputs=[
gr.Textbox(label='API key:',show_label=True,placeholder="Input your OpenAI key here"),
gr.File(label="Upload Document File", file_types=[".txt", ".pdf", ".doc", ".docx"])
#gr.UploadButton("Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"])
],
outputs="text",
title="Summarize My Document📄",
description="Document Summarizer📄: is a tool that helps you summarize a document.\nUpload a .PDF from your computer, click the `Upload a File` button.\n\nThis application uses `davinci` as the Language Model, don't forget to copy/paste your OpenAI API key.",
article=disclaimer
)
# Launch the web app
app.launch()