Spaces:
Runtime error
Runtime error
| import os | |
| import textwrap | |
| from langchain.document_loaders import UnstructuredFileLoader | |
| from langchain.llms import OpenAI | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.vectorstores import chroma | |
| from langchain.docstore.document import Document | |
| from langchain.chains.summarize import load_summarize_chain | |
| from pypdf import PdfReader | |
| import mimetypes | |
| import validators | |
| import requests | |
| import tempfile | |
| import gradio as gr | |
| import openai | |
| disclaimer= """Due to the input token limits of Language Models. This app is not applicable to lengthy documents.""" | |
| # def create_knowledge_base(docs): | |
| # # split into chunks | |
| # text_splitter = CharacterTextSplitter( | |
| # separator="\n", chunk_size=500, chunk_overlap=0, length_function=len | |
| # ) | |
| # chunks = text_splitter.split_documents(docs) | |
| # # Create knowledge base | |
| # document_store = [Document(page_content=text) for text in chunks] | |
| # return document_store | |
| def summarize_document(open_ai_key,file_obj): | |
| if open_ai_key is not None: | |
| loader = UnstructuredFileLoader(file_obj.name, strategy="fast") | |
| docs = loader.load() | |
| # split into chunks | |
| text_splitter = CharacterTextSplitter( | |
| separator="\n", chunk_size=500, chunk_overlap=0, length_function=len | |
| ) | |
| chunks = text_splitter.split_documents(docs) | |
| # Create knowledge base | |
| document_store = [Document(page_content=text) for text in chunks] | |
| os.environ['OPENAI_API_KEY'] = open_ai_key | |
| llm_model = OpenAI(model_name="text-davinci-003",temperature=0.4) | |
| summarization_chain = load_summarize_chain(llm = llm_model,chain_type="stuff") | |
| response = summarization_chain.run(vector_store) | |
| output = textwrap.fill(response, width=100) | |
| print(output) | |
| return output | |
| else: | |
| return "You forgot OpenAI API key" | |
| if __name__ == "__main__": | |
| # make a gradio interface | |
| # Create a web application | |
| app = gr.Interface( | |
| fn=summarize_document, | |
| inputs=[ | |
| gr.Textbox(label='API key:',show_label=True,placeholder="Input your OpenAI key here"), | |
| gr.File(label="Upload Document File", file_types=[".txt", ".pdf", ".doc", ".docx"]) | |
| #gr.UploadButton("Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"]) | |
| ], | |
| outputs="text", | |
| title="Summarize My Document📄", | |
| description="Document Summarizer📄: is a tool that helps you summarize a document.\nUpload a .PDF from your computer, click the `Upload a File` button.\n\nThis application uses `davinci` as the Language Model, don't forget to copy/paste your OpenAI API key.", | |
| article=disclaimer | |
| ) | |
| # Launch the web app | |
| app.launch() |