menikev commited on
Commit
befecdb
·
verified ·
1 Parent(s): 207f66e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -49
app.py CHANGED
@@ -62,13 +62,19 @@ def vector_database(chunks):
62
  Creates a FAISS vector database from the document chunks using a
63
  Hugging Face embeddings model.
64
  """
65
- # Using a sentence-transformer model from Hugging Face for embeddings
66
  embedding_model = HuggingFaceInferenceAPIEmbeddings(
67
  api_key=os.environ["HUGGINGFACEHUB_API_TOKEN"],
68
  model_name="sentence-transformers/all-MiniLM-L6-v2"
69
  )
70
- vectordb = FAISS.from_documents(chunks, embedding_model)
71
- return vectordb
 
 
 
 
 
 
72
 
73
  ## Retriever
74
  def retriever(file_path):
@@ -81,7 +87,9 @@ def retriever(file_path):
81
  # Add a check to ensure chunks are not empty
82
  if not chunks:
83
  raise ValueError("The uploaded document could not be processed. Please try another file.")
84
-
 
 
85
  vectordb = vector_database(chunks)
86
  retriever = vectordb.as_retriever()
87
  return retriever
@@ -91,58 +99,103 @@ def retriever_qa(file, query):
91
  """
92
  Sets up a RetrievalQA chain to answer questions based on the document.
93
  """
94
- # Use the file path from the Gradio file object
95
- file_path = file.name if file else None
96
-
97
  # Check if a file was uploaded
98
- if not file_path:
99
  return "Please upload a valid PDF file before asking a question."
 
 
 
 
100
 
101
- llm = get_llm()
 
 
102
  try:
 
103
  retriever_obj = retriever(file_path)
104
- except ValueError as e:
105
- return str(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
- # Custom prompt to act as a conversational legal advisor
108
- prompt_template = f"""
109
- You are a friendly and professional legal advisor. Your goal is to provide concise and contextual legal advice based on the provided document.
110
- Do not give verbatim answers. Instead, analyze the relevant text and respond in a conversational manner.
111
-
112
- Context:
113
- {file}
114
-
115
- Question: {query}
116
-
117
- Legal Advisor's Answer:
118
  """
119
-
120
- qa = RetrievalQA.from_chain_type(
121
- llm=llm,
122
- chain_type="stuff",
123
- retriever=retriever_obj,
124
- return_source_documents=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  )
126
-
127
- # Using a custom prompt template for the LLM
128
- response = qa.invoke({"query": prompt_template})
129
-
130
- # Extract the contextual response from the full LLM output
131
- result_text = response['result']
132
- return result_text
133
-
134
- # Create Gradio interface
135
- rag_application = gr.Interface(
136
- fn=retriever_qa,
137
- allow_flagging="never",
138
- inputs=[
139
- gr.File(label="Upload PDF File", file_count="single", file_types=['.pdf'], type="filepath"),
140
- gr.Textbox(label="Input Query", lines=2, placeholder="Type your question here...")
141
- ],
142
- outputs=gr.Textbox(label="Legal Advisor's Response"),
143
- title="Nigerian Constitution Legal Advisor Chatbot",
144
- description="Upload the Nigerian Constitution and ask me questions about it. I will provide a conversational and contextual response."
145
- )
146
 
147
  # Launch the app
148
- rag_application.launch(share=True)
 
 
 
 
 
 
 
62
  Creates a FAISS vector database from the document chunks using a
63
  Hugging Face embeddings model.
64
  """
65
+ # Fixed: Using proper parameter name for HuggingFaceInferenceAPIEmbeddings
66
  embedding_model = HuggingFaceInferenceAPIEmbeddings(
67
  api_key=os.environ["HUGGINGFACEHUB_API_TOKEN"],
68
  model_name="sentence-transformers/all-MiniLM-L6-v2"
69
  )
70
+
71
+ # Add error handling for embedding creation
72
+ try:
73
+ vectordb = FAISS.from_documents(chunks, embedding_model)
74
+ return vectordb
75
+ except Exception as e:
76
+ print(f"Error creating vector database: {e}")
77
+ raise ValueError(f"Failed to create embeddings: {e}")
78
 
79
  ## Retriever
80
  def retriever(file_path):
 
87
  # Add a check to ensure chunks are not empty
88
  if not chunks:
89
  raise ValueError("The uploaded document could not be processed. Please try another file.")
90
+
91
+ print(f"Created {len(chunks)} chunks from the document")
92
+
93
  vectordb = vector_database(chunks)
94
  retriever = vectordb.as_retriever()
95
  return retriever
 
99
  """
100
  Sets up a RetrievalQA chain to answer questions based on the document.
101
  """
 
 
 
102
  # Check if a file was uploaded
103
+ if not file:
104
  return "Please upload a valid PDF file before asking a question."
105
+
106
+ # Check if query is provided
107
+ if not query or query.strip() == "":
108
+ return "Please enter a question to get started."
109
 
110
+ # Use the file path from the Gradio file object
111
+ file_path = file.name if hasattr(file, 'name') else str(file)
112
+
113
  try:
114
+ llm = get_llm()
115
  retriever_obj = retriever(file_path)
116
+
117
+ # Simplified prompt - let the RetrievalQA chain handle the context properly
118
+ qa = RetrievalQA.from_chain_type(
119
+ llm=llm,
120
+ chain_type="stuff",
121
+ retriever=retriever_obj,
122
+ return_source_documents=True,
123
+ )
124
+
125
+ # Create a proper prompt for legal advice
126
+ legal_prompt = f"""Based on the document content, please provide professional legal guidance for the following question.
127
+ Be conversational, clear, and cite relevant sections when possible.
128
+
129
+ Question: {query}
130
+
131
+ Please provide a helpful and accurate response based on the document content."""
132
+
133
+ response = qa.invoke({"query": legal_prompt})
134
+
135
+ # Extract the result
136
+ result_text = response.get('result', 'No response generated.')
137
+
138
+ # Clean up the response if needed
139
+ if result_text.startswith("Legal Advisor's Answer:"):
140
+ result_text = result_text.replace("Legal Advisor's Answer:", "").strip()
141
+
142
+ return result_text
143
+
144
+ except Exception as e:
145
+ error_msg = str(e)
146
+ if "API token" in error_msg or "authentication" in error_msg.lower():
147
+ return "Error: Please check your Hugging Face API token configuration."
148
+ elif "embedding" in error_msg.lower():
149
+ return "Error: Failed to create document embeddings. Please try uploading a different PDF file."
150
+ else:
151
+ return f"Error processing your request: {error_msg}"
152
 
153
+ # Create Gradio interface with better error handling
154
+ def create_interface():
 
 
 
 
 
 
 
 
 
155
  """
156
+ Creates and returns the Gradio interface
157
+ """
158
+ interface = gr.Interface(
159
+ fn=retriever_qa,
160
+ allow_flagging="never",
161
+ inputs=[
162
+ gr.File(
163
+ label="Upload PDF File",
164
+ file_count="single",
165
+ file_types=['.pdf']
166
+ ),
167
+ gr.Textbox(
168
+ label="Input Query",
169
+ lines=3,
170
+ placeholder="Type your legal question here...",
171
+ info="Ask questions about the uploaded document"
172
+ )
173
+ ],
174
+ outputs=gr.Textbox(
175
+ label="Legal Advisor's Response",
176
+ lines=10,
177
+ max_lines=20
178
+ ),
179
+ title="Nigerian Constitution Legal Advisor Chatbot",
180
+ description="""
181
+ Upload a PDF document (like the Nigerian Constitution) and ask legal questions about it.
182
+ The AI will analyze the document and provide contextual legal guidance.
183
+
184
+ **Note:** Make sure to set your Hugging Face API token in the environment variables.
185
+ """,
186
+ examples=[
187
+ [None, "What are the fundamental rights guaranteed by this constitution?"],
188
+ [None, "What is the process for constitutional amendments?"],
189
+ [None, "What are the powers of the federal government?"]
190
+ ]
191
  )
192
+ return interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  # Launch the app
195
+ if __name__ == "__main__":
196
+ # Check if API token is set
197
+ if not os.environ.get("HUGGINGFACEHUB_API_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN") == "hf_YOUR_HUGGINGFACE_TOKEN":
198
+ print("WARNING: Please set your actual Hugging Face API token in the HUGGINGFACEHUB_API_TOKEN environment variable")
199
+
200
+ rag_application = create_interface()
201
+ rag_application.launch(share=True)