Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import os
|
|
| 3 |
api_token = os.getenv("HF_TOKEN")
|
| 4 |
|
| 5 |
from langchain_community.vectorstores import FAISS
|
| 6 |
-
from langchain_community.document_loaders import
|
| 7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 8 |
from langchain.chains import ConversationalRetrievalChain
|
| 9 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
@@ -35,17 +35,13 @@ Chat History: {chat_history}
|
|
| 35 |
Craft the response as a seamless, thorough, and authoritative explanation that naturally integrates all aspects of the query.
|
| 36 |
"""
|
| 37 |
|
| 38 |
-
# Load and split documents
|
| 39 |
def load_doc(list_file_path):
|
| 40 |
pages = []
|
| 41 |
for file_path in list_file_path:
|
| 42 |
-
if file_path.endswith('.
|
| 43 |
-
loader = PyPDFLoader(file_path)
|
| 44 |
-
elif file_path.endswith('.txt'):
|
| 45 |
loader = TextLoader(file_path)
|
| 46 |
-
|
| 47 |
-
continue
|
| 48 |
-
pages.extend(loader.load())
|
| 49 |
|
| 50 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 51 |
chunk_size=1024,
|
|
@@ -99,7 +95,7 @@ def initialize_database(list_file_obj, progress=gr.Progress()):
|
|
| 99 |
list_file_path = [x.name for x in list_file_obj if x is not None]
|
| 100 |
doc_splits = load_doc(list_file_path)
|
| 101 |
vector_db = create_db(doc_splits)
|
| 102 |
-
return vector_db, "
|
| 103 |
|
| 104 |
# Initialize LLM
|
| 105 |
def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
|
@@ -126,7 +122,7 @@ def conversation(qa_chain, message, history):
|
|
| 126 |
for i in range(3):
|
| 127 |
if i < len(response_sources):
|
| 128 |
sources_content.append(response_sources[i].page_content.strip())
|
| 129 |
-
sources_pages.append(
|
| 130 |
else:
|
| 131 |
sources_content.append("")
|
| 132 |
sources_pages.append(0)
|
|
@@ -141,20 +137,20 @@ def demo():
|
|
| 141 |
with gr.Blocks(theme=gr.themes.Default(primary_hue="red", secondary_hue="pink", neutral_hue="sky")) as demo:
|
| 142 |
vector_db = gr.State()
|
| 143 |
qa_chain = gr.State()
|
| 144 |
-
gr.HTML("<center><h1>RAG Document Chatbot</h1><center>")
|
| 145 |
-
gr.Markdown("""<b>Query your documents!</b> This AI agent performs retrieval augmented generation (RAG) on
|
| 146 |
<b>Please do not upload confidential documents.</b>
|
| 147 |
""")
|
| 148 |
|
| 149 |
with gr.Row():
|
| 150 |
with gr.Column(scale=86):
|
| 151 |
-
gr.Markdown("<b>Step 1 - Upload
|
| 152 |
with gr.Row():
|
| 153 |
document = gr.Files(height=300, file_count="multiple",
|
| 154 |
-
file_types=["
|
| 155 |
-
label="Upload
|
| 156 |
with gr.Row():
|
| 157 |
-
db_btn = gr.Button("Create
|
| 158 |
with gr.Row():
|
| 159 |
db_progress = gr.Textbox(value="Not initialized", show_label=False)
|
| 160 |
|
|
@@ -186,7 +182,7 @@ def demo():
|
|
| 186 |
with gr.Row():
|
| 187 |
doc_source = gr.Textbox(label=f"Reference {i}", lines=2,
|
| 188 |
container=True, scale=20)
|
| 189 |
-
source_page = gr.Number(label="
|
| 190 |
with gr.Row():
|
| 191 |
msg = gr.Textbox(placeholder="Ask a question", container=True)
|
| 192 |
with gr.Row():
|
|
|
|
| 3 |
api_token = os.getenv("HF_TOKEN")
|
| 4 |
|
| 5 |
from langchain_community.vectorstores import FAISS
|
| 6 |
+
from langchain_community.document_loaders import TextLoader
|
| 7 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 8 |
from langchain.chains import ConversationalRetrievalChain
|
| 9 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
|
|
| 35 |
Craft the response as a seamless, thorough, and authoritative explanation that naturally integrates all aspects of the query.
|
| 36 |
"""
|
| 37 |
|
| 38 |
+
# Load and split text documents
|
| 39 |
def load_doc(list_file_path):
|
| 40 |
pages = []
|
| 41 |
for file_path in list_file_path:
|
| 42 |
+
if file_path.endswith('.txt'):
|
|
|
|
|
|
|
| 43 |
loader = TextLoader(file_path)
|
| 44 |
+
pages.extend(loader.load())
|
|
|
|
|
|
|
| 45 |
|
| 46 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 47 |
chunk_size=1024,
|
|
|
|
| 95 |
list_file_path = [x.name for x in list_file_obj if x is not None]
|
| 96 |
doc_splits = load_doc(list_file_path)
|
| 97 |
vector_db = create_db(doc_splits)
|
| 98 |
+
return vector_db, "Text database created!"
|
| 99 |
|
| 100 |
# Initialize LLM
|
| 101 |
def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
|
|
|
| 122 |
for i in range(3):
|
| 123 |
if i < len(response_sources):
|
| 124 |
sources_content.append(response_sources[i].page_content.strip())
|
| 125 |
+
sources_pages.append(0) # For text files, we don't have page numbers
|
| 126 |
else:
|
| 127 |
sources_content.append("")
|
| 128 |
sources_pages.append(0)
|
|
|
|
| 137 |
with gr.Blocks(theme=gr.themes.Default(primary_hue="red", secondary_hue="pink", neutral_hue="sky")) as demo:
|
| 138 |
vector_db = gr.State()
|
| 139 |
qa_chain = gr.State()
|
| 140 |
+
gr.HTML("<center><h1>RAG Text Document Chatbot</h1><center>")
|
| 141 |
+
gr.Markdown("""<b>Query your text documents!</b> This AI agent performs retrieval augmented generation (RAG) on TXT documents.
|
| 142 |
<b>Please do not upload confidential documents.</b>
|
| 143 |
""")
|
| 144 |
|
| 145 |
with gr.Row():
|
| 146 |
with gr.Column(scale=86):
|
| 147 |
+
gr.Markdown("<b>Step 1 - Upload Text Files and Initialize RAG pipeline</b>")
|
| 148 |
with gr.Row():
|
| 149 |
document = gr.Files(height=300, file_count="multiple",
|
| 150 |
+
file_types=["txt"], interactive=True,
|
| 151 |
+
label="Upload TXT documents")
|
| 152 |
with gr.Row():
|
| 153 |
+
db_btn = gr.Button("Create text database")
|
| 154 |
with gr.Row():
|
| 155 |
db_progress = gr.Textbox(value="Not initialized", show_label=False)
|
| 156 |
|
|
|
|
| 182 |
with gr.Row():
|
| 183 |
doc_source = gr.Textbox(label=f"Reference {i}", lines=2,
|
| 184 |
container=True, scale=20)
|
| 185 |
+
source_page = gr.Number(label="Line Range", scale=1, visible=False)
|
| 186 |
with gr.Row():
|
| 187 |
msg = gr.Textbox(placeholder="Ask a question", container=True)
|
| 188 |
with gr.Row():
|