import gradio as gr from pydantic import NoneStr import os from langchain.chains.question_answering import load_qa_chain from langchain.document_loaders import UnstructuredFileLoader from langchain.embeddings.openai import OpenAIEmbeddings from langchain.llms import OpenAI from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from pypdf import PdfReader import mimetypes import validators import requests import tempfile import openai def get_empty_state(): return {"knowledge_base": None} def create_knowledge_base(docs): # split into chunks text_splitter = CharacterTextSplitter( separator="\n", chunk_size=500, chunk_overlap=200, length_function=len ) chunks = text_splitter.split_documents(docs) embeddings = OpenAIEmbeddings() knowledge_base = FAISS.from_documents(chunks, embeddings) return knowledge_base def upload_multiple_urls(urls): all_docs = [] file_paths = [] for url in urls: if validators.url(url): r = requests.get(url) if r.status_code != 200: raise ValueError( "Check the url of your file; returned status code %s" % r.status_code ) content_type = r.headers.get("content-type") file_extension = mimetypes.guess_extension(content_type) temp_file = tempfile.NamedTemporaryFile(suffix=file_extension, delete=False) temp_file.write(r.content) file_path = temp_file.name loader = UnstructuredFileLoader(file_path, strategy="fast") docs = loader.load() all_docs.extend(docs) file_paths.append(file_path) else: raise ValueError("Please enter a valid URL") knowledge_base = create_knowledge_base(all_docs) return file_paths, {"knowledge_base": knowledge_base} def answer_question(question, state): try: knowledge_base = state["knowledge_base"] docs = knowledge_base.similarity_search(question) llm = OpenAI(temperature=0.4) chain = load_qa_chain(llm, chain_type="stuff") response = chain.run(input_documents=docs, question=question) return response except: return "Please upload Proper Document" def process_files(urls, question): file_output, state = upload_multiple_urls([url.strip() for url in urls.split(',')]) answer = answer_question(question, state) return answer with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo: with gr.Row(): gr.HTML("""
