import os import textwrap from langchain.document_loaders import UnstructuredFileLoader from langchain.llms import OpenAI from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import chroma from langchain.docstore.document import Document from langchain.chains.summarize import load_summarize_chain from pypdf import PdfReader import mimetypes import validators import requests import tempfile import gradio as gr import openai disclaimer= """Due to the input token limits of Language Models. This app is not applicable to lengthy documents.""" # def create_knowledge_base(docs): # # split into chunks # text_splitter = CharacterTextSplitter( # separator="\n", chunk_size=500, chunk_overlap=0, length_function=len # ) # chunks = text_splitter.split_documents(docs) # # Create knowledge base # document_store = [Document(page_content=text) for text in chunks] # return document_store def summarize_document(open_ai_key,file_obj): if open_ai_key is not None: loader = UnstructuredFileLoader(file_obj.name, strategy="fast") docs = loader.load() # split into chunks text_splitter = CharacterTextSplitter( separator="\n", chunk_size=500, chunk_overlap=0, length_function=len ) chunks = text_splitter.split_documents(docs) # Create knowledge base document_store = [Document(page_content=text) for text in chunks] os.environ['OPENAI_API_KEY'] = open_ai_key llm_model = OpenAI(model_name="text-davinci-003",temperature=0.4) summarization_chain = load_summarize_chain(llm = llm_model,chain_type="stuff") response = summarization_chain.run(vector_store) output = textwrap.fill(response, width=100) print(output) return output else: return "You forgot OpenAI API key" if __name__ == "__main__": # make a gradio interface # Create a web application app = gr.Interface( fn=summarize_document, inputs=[ gr.Textbox(label='API key:',show_label=True,placeholder="Input your OpenAI key here"), gr.File(label="Upload Document File", file_types=[".txt", ".pdf", ".doc", ".docx"]) #gr.UploadButton("Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"]) ], outputs="text", title="Summarize My Documentđź“„", description="Document Summarizerđź“„: is a tool that helps you summarize a document.\nUpload a .PDF from your computer, click the `Upload a File` button.\n\nThis application uses `davinci` as the Language Model, don't forget to copy/paste your OpenAI API key.", article=disclaimer ) # Launch the web app app.launch()