PriyaMishra commited on
Commit
dbe6595
·
verified ·
1 Parent(s): 5278dab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -17
app.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  api_token = os.getenv("HF_TOKEN")
4
 
5
  from langchain_community.vectorstores import FAISS
6
- from langchain_community.document_loaders import PyPDFLoader, TextLoader
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.chains import ConversationalRetrievalChain
9
  from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -35,17 +35,13 @@ Chat History: {chat_history}
35
  Craft the response as a seamless, thorough, and authoritative explanation that naturally integrates all aspects of the query.
36
  """
37
 
38
- # Load and split documents
39
  def load_doc(list_file_path):
40
  pages = []
41
  for file_path in list_file_path:
42
- if file_path.endswith('.pdf'):
43
- loader = PyPDFLoader(file_path)
44
- elif file_path.endswith('.txt'):
45
  loader = TextLoader(file_path)
46
- else:
47
- continue
48
- pages.extend(loader.load())
49
 
50
  text_splitter = RecursiveCharacterTextSplitter(
51
  chunk_size=1024,
@@ -99,7 +95,7 @@ def initialize_database(list_file_obj, progress=gr.Progress()):
99
  list_file_path = [x.name for x in list_file_obj if x is not None]
100
  doc_splits = load_doc(list_file_path)
101
  vector_db = create_db(doc_splits)
102
- return vector_db, "Database created!"
103
 
104
  # Initialize LLM
105
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
@@ -126,7 +122,7 @@ def conversation(qa_chain, message, history):
126
  for i in range(3):
127
  if i < len(response_sources):
128
  sources_content.append(response_sources[i].page_content.strip())
129
- sources_pages.append(response_sources[i].metadata.get("page", 0) + 1)
130
  else:
131
  sources_content.append("")
132
  sources_pages.append(0)
@@ -141,20 +137,20 @@ def demo():
141
  with gr.Blocks(theme=gr.themes.Default(primary_hue="red", secondary_hue="pink", neutral_hue="sky")) as demo:
142
  vector_db = gr.State()
143
  qa_chain = gr.State()
144
- gr.HTML("<center><h1>RAG Document Chatbot</h1><center>")
145
- gr.Markdown("""<b>Query your documents!</b> This AI agent performs retrieval augmented generation (RAG) on PDF and TXT documents.
146
  <b>Please do not upload confidential documents.</b>
147
  """)
148
 
149
  with gr.Row():
150
  with gr.Column(scale=86):
151
- gr.Markdown("<b>Step 1 - Upload Documents and Initialize RAG pipeline</b>")
152
  with gr.Row():
153
  document = gr.Files(height=300, file_count="multiple",
154
- file_types=["pdf", "txt"], interactive=True,
155
- label="Upload PDF or TXT documents")
156
  with gr.Row():
157
- db_btn = gr.Button("Create vector database")
158
  with gr.Row():
159
  db_progress = gr.Textbox(value="Not initialized", show_label=False)
160
 
@@ -186,7 +182,7 @@ def demo():
186
  with gr.Row():
187
  doc_source = gr.Textbox(label=f"Reference {i}", lines=2,
188
  container=True, scale=20)
189
- source_page = gr.Number(label="Page", scale=1)
190
  with gr.Row():
191
  msg = gr.Textbox(placeholder="Ask a question", container=True)
192
  with gr.Row():
 
3
  api_token = os.getenv("HF_TOKEN")
4
 
5
  from langchain_community.vectorstores import FAISS
6
+ from langchain_community.document_loaders import TextLoader
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.chains import ConversationalRetrievalChain
9
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
35
  Craft the response as a seamless, thorough, and authoritative explanation that naturally integrates all aspects of the query.
36
  """
37
 
38
+ # Load and split text documents
39
  def load_doc(list_file_path):
40
  pages = []
41
  for file_path in list_file_path:
42
+ if file_path.endswith('.txt'):
 
 
43
  loader = TextLoader(file_path)
44
+ pages.extend(loader.load())
 
 
45
 
46
  text_splitter = RecursiveCharacterTextSplitter(
47
  chunk_size=1024,
 
95
  list_file_path = [x.name for x in list_file_obj if x is not None]
96
  doc_splits = load_doc(list_file_path)
97
  vector_db = create_db(doc_splits)
98
+ return vector_db, "Text database created!"
99
 
100
  # Initialize LLM
101
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
 
122
  for i in range(3):
123
  if i < len(response_sources):
124
  sources_content.append(response_sources[i].page_content.strip())
125
+ sources_pages.append(0) # For text files, we don't have page numbers
126
  else:
127
  sources_content.append("")
128
  sources_pages.append(0)
 
137
  with gr.Blocks(theme=gr.themes.Default(primary_hue="red", secondary_hue="pink", neutral_hue="sky")) as demo:
138
  vector_db = gr.State()
139
  qa_chain = gr.State()
140
+ gr.HTML("<center><h1>RAG Text Document Chatbot</h1><center>")
141
+ gr.Markdown("""<b>Query your text documents!</b> This AI agent performs retrieval augmented generation (RAG) on TXT documents.
142
  <b>Please do not upload confidential documents.</b>
143
  """)
144
 
145
  with gr.Row():
146
  with gr.Column(scale=86):
147
+ gr.Markdown("<b>Step 1 - Upload Text Files and Initialize RAG pipeline</b>")
148
  with gr.Row():
149
  document = gr.Files(height=300, file_count="multiple",
150
+ file_types=["txt"], interactive=True,
151
+ label="Upload TXT documents")
152
  with gr.Row():
153
+ db_btn = gr.Button("Create text database")
154
  with gr.Row():
155
  db_progress = gr.Textbox(value="Not initialized", show_label=False)
156
 
 
182
  with gr.Row():
183
  doc_source = gr.Textbox(label=f"Reference {i}", lines=2,
184
  container=True, scale=20)
185
+ source_page = gr.Number(label="Line Range", scale=1, visible=False)
186
  with gr.Row():
187
  msg = gr.Textbox(placeholder="Ask a question", container=True)
188
  with gr.Row():