dnzblgn commited on
Commit
6faf44c
Β·
verified Β·
1 Parent(s): c878a32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -59
app.py CHANGED
@@ -1,56 +1,39 @@
1
  import gradio as gr
2
  import os
3
  import time
4
- import PyPDF2
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
  from langchain_community.vectorstores import FAISS
7
  from langchain.chains import ConversationalRetrievalChain
8
  from langchain_community.embeddings import HuggingFaceEmbeddings
9
  from langchain.memory import ConversationBufferMemory
10
  from langchain_community.llms import HuggingFaceEndpoint
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- def read_file(file_path):
13
- try:
14
- if file_path.endswith(".txt"):
15
- with open(file_path, "r", encoding="utf-8") as f:
16
- content = f.read()
17
- elif file_path.endswith(".pdf"):
18
- content = ""
19
- with open(file_path, "rb") as f:
20
- reader = PyPDF2.PdfReader(f)
21
- for page in reader.pages:
22
- content += page.extract_text() + "\n"
23
- else:
24
- return None, "Unsupported file format. Please upload a .txt or .pdf file."
25
-
26
- if not content.strip():
27
- return None, "File is empty. Please upload a valid document."
28
-
29
- return content, "Successfully processed the uploaded file! Ready for questions."
30
- except Exception as e:
31
- return None, f"Error reading file: {str(e)}"
32
-
33
- def create_db_from_text(text):
34
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
35
- splits = text_splitter.create_documents([text])
36
-
37
- # Specify an explicit model for embeddings
38
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
39
-
40
- vector_db = FAISS.from_documents(splits, embeddings)
41
- return vector_db
42
 
43
  def initialize_chatbot(vector_db):
44
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
45
  retriever = vector_db.as_retriever()
46
-
47
  llm = HuggingFaceEndpoint(
48
  repo_id="mistralai/Mistral-7B-Instruct-v0.2",
49
  huggingfacehub_api_token=os.environ.get("HUGGINGFACE_API_TOKEN"),
50
  temperature=0.5,
51
  max_new_tokens=256
52
  )
53
-
54
  qa_chain = ConversationalRetrievalChain.from_llm(
55
  llm=llm,
56
  retriever=retriever,
@@ -59,19 +42,16 @@ def initialize_chatbot(vector_db):
59
  )
60
  return qa_chain
61
 
62
- def process_and_initialize(file):
63
- if file is None:
64
  return None, None, "Please upload a file first."
65
 
66
  try:
67
- text, status_message = read_file(file)
68
- if text is None:
69
- return None, None, status_message
70
-
71
- db = create_db_from_text(text)
72
  qa = initialize_chatbot(db)
73
-
74
- return db, qa, status_message
75
  except Exception as e:
76
  return None, None, f"Processing error: {str(e)}"
77
 
@@ -80,10 +60,8 @@ def user_query_typing_effect(query, qa_chain, chatbot):
80
  try:
81
  response = qa_chain.invoke({"question": query, "chat_history": []})
82
  assistant_response = response["answer"]
83
-
84
  history.append({"role": "user", "content": query})
85
  history.append({"role": "assistant", "content": ""})
86
-
87
  for i in range(len(assistant_response)):
88
  history[-1]["content"] += assistant_response[i]
89
  yield history, ""
@@ -112,28 +90,24 @@ def demo():
112
  background-color: #FFF5E1;
113
  }
114
  """
115
-
116
  with gr.Blocks(css=custom_css) as app:
117
- vector_db = gr.State(None)
118
- qa_chain = gr.State(None)
119
-
120
- gr.Markdown("### 🌟 **Document-Based Chatbot** 🌟")
121
  gr.Markdown("#### Upload your document and ask questions interactively!")
122
-
123
  with gr.Row():
124
  with gr.Column(scale=1):
125
- txt_file = gr.File(
126
- label="πŸ“ Upload Document",
127
  file_types=[".txt", ".pdf"],
128
- type="filepath"
129
  )
130
- analyze_btn = gr.Button("πŸš€ Process Document")
131
  status = gr.Textbox(
132
  label="πŸ“Š Status",
133
  placeholder="Status updates will appear here...",
134
  interactive=False
135
  )
136
-
137
  with gr.Column(scale=3):
138
  chatbot = gr.Chatbot(
139
  label="πŸ€– Chat with your data",
@@ -151,29 +125,25 @@ def demo():
151
  container=False
152
  )
153
  query_btn = gr.Button("Ask")
154
-
155
  analyze_btn.click(
156
  fn=process_and_initialize,
157
  inputs=[txt_file],
158
  outputs=[vector_db, qa_chain, status],
159
  show_progress="minimal"
160
  )
161
-
162
  query_btn.click(
163
  fn=user_query_typing_effect,
164
  inputs=[query_input, qa_chain, chatbot],
165
  outputs=[chatbot, query_input],
166
  show_progress="minimal"
167
  )
168
-
169
  query_input.submit(
170
  fn=user_query_typing_effect,
171
  inputs=[query_input, qa_chain, chatbot],
172
  outputs=[chatbot, query_input],
173
  show_progress="minimal"
174
  )
175
-
176
  app.launch()
177
 
178
  if __name__ == "__main__":
179
- demo()
 
1
  import gradio as gr
2
  import os
3
  import time
 
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain_community.vectorstores import FAISS
6
  from langchain.chains import ConversationalRetrievalChain
7
  from langchain_community.embeddings import HuggingFaceEmbeddings
8
  from langchain.memory import ConversationBufferMemory
9
  from langchain_community.llms import HuggingFaceEndpoint
10
+ from langchain_community.document_loaders import PyPDFLoader
11
+
12
+ def load_doc(list_file_path):
13
+ loaders = [PyPDFLoader(x) for x in list_file_path]
14
+ pages = []
15
+ for loader in loaders:
16
+ pages.extend(loader.load())
17
+ text_splitter = RecursiveCharacterTextSplitter(
18
+ chunk_size=1024, chunk_overlap=64
19
+ )
20
+ doc_splits = text_splitter.split_documents(pages)
21
+ return doc_splits
22
 
23
+ def create_db(splits):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
25
+ vectordb = FAISS.from_documents(splits, embeddings)
26
+ return vectordb
 
27
 
28
  def initialize_chatbot(vector_db):
29
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
30
  retriever = vector_db.as_retriever()
 
31
  llm = HuggingFaceEndpoint(
32
  repo_id="mistralai/Mistral-7B-Instruct-v0.2",
33
  huggingfacehub_api_token=os.environ.get("HUGGINGFACE_API_TOKEN"),
34
  temperature=0.5,
35
  max_new_tokens=256
36
  )
 
37
  qa_chain = ConversationalRetrievalChain.from_llm(
38
  llm=llm,
39
  retriever=retriever,
 
42
  )
43
  return qa_chain
44
 
45
+ def process_and_initialize(files):
46
+ if not files:
47
  return None, None, "Please upload a file first."
48
 
49
  try:
50
+ list_file_path = [file.name for file in files if file is not None]
51
+ doc_splits = load_doc(list_file_path)
52
+ db = create_db(doc_splits)
 
 
53
  qa = initialize_chatbot(db)
54
+ return db, qa, "Database created! Ready for questions."
 
55
  except Exception as e:
56
  return None, None, f"Processing error: {str(e)}"
57
 
 
60
  try:
61
  response = qa_chain.invoke({"question": query, "chat_history": []})
62
  assistant_response = response["answer"]
 
63
  history.append({"role": "user", "content": query})
64
  history.append({"role": "assistant", "content": ""})
 
65
  for i in range(len(assistant_response)):
66
  history[-1]["content"] += assistant_response[i]
67
  yield history, ""
 
90
  background-color: #FFF5E1;
91
  }
92
  """
 
93
  with gr.Blocks(css=custom_css) as app:
94
+ vector_db = gr.State()
95
+ qa_chain = gr.State()
96
+ gr.Markdown("### 🌟 **PDF & TXT Chatbot** 🌟")
 
97
  gr.Markdown("#### Upload your document and ask questions interactively!")
 
98
  with gr.Row():
99
  with gr.Column(scale=1):
100
+ txt_file = gr.Files(
101
+ label="πŸ“ Upload Documents",
102
  file_types=[".txt", ".pdf"],
103
+ type="file"
104
  )
105
+ analyze_btn = gr.Button("πŸš€ Process Documents")
106
  status = gr.Textbox(
107
  label="πŸ“Š Status",
108
  placeholder="Status updates will appear here...",
109
  interactive=False
110
  )
 
111
  with gr.Column(scale=3):
112
  chatbot = gr.Chatbot(
113
  label="πŸ€– Chat with your data",
 
125
  container=False
126
  )
127
  query_btn = gr.Button("Ask")
 
128
  analyze_btn.click(
129
  fn=process_and_initialize,
130
  inputs=[txt_file],
131
  outputs=[vector_db, qa_chain, status],
132
  show_progress="minimal"
133
  )
 
134
  query_btn.click(
135
  fn=user_query_typing_effect,
136
  inputs=[query_input, qa_chain, chatbot],
137
  outputs=[chatbot, query_input],
138
  show_progress="minimal"
139
  )
 
140
  query_input.submit(
141
  fn=user_query_typing_effect,
142
  inputs=[query_input, qa_chain, chatbot],
143
  outputs=[chatbot, query_input],
144
  show_progress="minimal"
145
  )
 
146
  app.launch()
147
 
148
  if __name__ == "__main__":
149
+ demo()