Spaces:

randstad-legacy
/

Multi_URL_QA

Runtime error

File size: 3,197 Bytes

import gradio as gr
from pydantic import NoneStr
import os
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import UnstructuredFileLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from pypdf import PdfReader
import mimetypes
import validators
import requests
import tempfile
import openai

def get_empty_state():
    return {"knowledge_base": None}

def create_knowledge_base(docs):
    # split into chunks
    text_splitter = CharacterTextSplitter(
        separator="\n", chunk_size=500, chunk_overlap=200, length_function=len
    )
    chunks = text_splitter.split_documents(docs)
    embeddings = OpenAIEmbeddings()
    knowledge_base = FAISS.from_documents(chunks, embeddings)
    return knowledge_base

def upload_multiple_urls(urls):
    all_docs = []
    file_paths = []
    for url in urls:
        if validators.url(url):
            r = requests.get(url)
            if r.status_code != 200:
                raise ValueError(
                    "Check the url of your file; returned status code %s" % r.status_code
                )
            content_type = r.headers.get("content-type")
            file_extension = mimetypes.guess_extension(content_type)
            temp_file = tempfile.NamedTemporaryFile(suffix=file_extension, delete=False)
            temp_file.write(r.content)
            file_path = temp_file.name
            loader = UnstructuredFileLoader(file_path, strategy="fast")
            docs = loader.load()
            all_docs.extend(docs)
            file_paths.append(file_path)
        else:
            raise ValueError("Please enter a valid URL")
    knowledge_base = create_knowledge_base(all_docs)
    return file_paths, {"knowledge_base": knowledge_base}

def answer_question(question, state):
    try:
        knowledge_base = state["knowledge_base"]
        docs = knowledge_base.similarity_search(question)

        llm = OpenAI(temperature=0.4)
        chain = load_qa_chain(llm, chain_type="stuff")
        response = chain.run(input_documents=docs, question=question)
        return response
    except:
        return "Please upload Proper Document"

def process_files(urls, question):
    file_output, state = upload_multiple_urls([url.strip() for url in urls.split(',')])
    answer = answer_question(question, state)
    return answer

with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
    with gr.Row():
       gr.HTML("""<center><img class="" align="" src="https://companieslogo.com/img/orig/RAND.AS_BIG-0f1935a4.png?t=1651813778" alt="Image" width="210" height="210"></center>""")

    with gr.Row():
      gr.HTML("""<center><h1>Mulit-URL QA</h1></center>""")
    with gr.Row():
      url = gr.Textbox(label="URL")
    with gr.Row():
      Question = gr.Textbox(label="Question")
      with gr.Column(scale=0.30):
        enter_btn = gr.Button(value="Enter").style(height=100)
    with gr.Row():
      Output = gr.Textbox(label="Answer")

    enter_btn.click(process_files,[url,Question],Output)
demo.title = "URL QA"
demo.launch()