Karthikeyan commited on
Commit
4329768
·
1 Parent(s): 2c0ee31

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pydantic import NoneStr
3
+ import os
4
+ from langchain.chains.question_answering import load_qa_chain
5
+ from langchain.document_loaders import UnstructuredFileLoader
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+ from langchain.llms import OpenAI
8
+ from langchain.text_splitter import CharacterTextSplitter
9
+ from langchain.vectorstores import FAISS
10
+ from pypdf import PdfReader
11
+ import mimetypes
12
+ import validators
13
+ import requests
14
+ import tempfile
15
+ import openai
16
+
17
+ def get_empty_state():
18
+ return {"knowledge_base": None}
19
+
20
+ def create_knowledge_base(docs):
21
+ # split into chunks
22
+ text_splitter = CharacterTextSplitter(
23
+ separator="\n", chunk_size=500, chunk_overlap=200, length_function=len
24
+ )
25
+ chunks = text_splitter.split_documents(docs)
26
+ embeddings = OpenAIEmbeddings()
27
+ knowledge_base = FAISS.from_documents(chunks, embeddings)
28
+ return knowledge_base
29
+
30
+ def upload_multiple_urls(urls):
31
+ all_docs = []
32
+ file_paths = []
33
+ for url in urls:
34
+ if validators.url(url):
35
+ r = requests.get(url)
36
+ if r.status_code != 200:
37
+ raise ValueError(
38
+ "Check the url of your file; returned status code %s" % r.status_code
39
+ )
40
+ content_type = r.headers.get("content-type")
41
+ file_extension = mimetypes.guess_extension(content_type)
42
+ temp_file = tempfile.NamedTemporaryFile(suffix=file_extension, delete=False)
43
+ temp_file.write(r.content)
44
+ file_path = temp_file.name
45
+ loader = UnstructuredFileLoader(file_path, strategy="fast")
46
+ docs = loader.load()
47
+ all_docs.extend(docs)
48
+ file_paths.append(file_path)
49
+ else:
50
+ raise ValueError("Please enter a valid URL")
51
+ knowledge_base = create_knowledge_base(all_docs)
52
+ return file_paths, {"knowledge_base": knowledge_base}
53
+
54
+ def answer_question(question, state):
55
+ try:
56
+ knowledge_base = state["knowledge_base"]
57
+ docs = knowledge_base.similarity_search(question)
58
+
59
+ llm = OpenAI(temperature=0.4)
60
+ chain = load_qa_chain(llm, chain_type="stuff")
61
+ response = chain.run(input_documents=docs, question=question)
62
+ return response
63
+ except:
64
+ return "Please upload Proper Document"
65
+
66
+ def process_files(urls, question):
67
+ file_output, state = upload_multiple_urls([url.strip() for url in urls.split(',')])
68
+ answer = answer_question(question, state)
69
+ return answer
70
+
71
+ with gr.Blocks(css="style.css",theme=gr.themes.Soft()) as demo:
72
+ with gr.Row():
73
+ gr.HTML("""<img class="leftimage" align="left" src="https://companieslogo.com/img/orig/RAND.AS_BIG-0f1935a4.png?t=1651813778" alt="Image" width="210" height="210">
74
+ <img class="rightimage" align="right" src="https://workllama.com/wp-content/uploads/2022/05/WL_Logo.svg" alt="Image" width="210" height="210">""")
75
+
76
+ with gr.Row():
77
+ gr.HTML("""<center><h1>Mulit-URL QA</h1></center>""")
78
+ with gr.Row():
79
+ url = gr.Textbox(label="URL")
80
+ with gr.Row():
81
+ Question = gr.Textbox(label="Question")
82
+ with gr.Column(scale=0.30):
83
+ enter_btn = gr.Button(value="Enter").style(height=100)
84
+ with gr.Row():
85
+ Output = gr.Textbox(label="Answer")
86
+
87
+ enter_btn.click(process_files,[url,Question],Output)
88
+ demo.title = "URL QA"
89
+ demo.launch()