Files changed (1) hide show
  1. app.py +177 -268
app.py CHANGED
@@ -2,116 +2,79 @@ import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
  from cryptography.fernet import Fernet
5
-
6
- # --- LangChain / RAG Imports ---
7
  from langchain_community.vectorstores import FAISS
8
- from langchain.chains import ConversationalRetrievalChain
9
- from langchain.memory import ConversationSummaryMemory #ConversationBufferMemory
10
- from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
 
11
 
12
  def load_decrypted_preprompt(file_path="pre_prompt.enc"):
13
  """
14
- Load and decrypt the pre-prompt from the encrypted file using the key
15
- stored in the environment variable 'ENCRYPTION_KEY'.
16
  """
17
- # Retrieve the encryption key from the environment
18
- key_str = os.getenv("KEY", "")
19
- if not key_str:
20
- raise ValueError("Missing ENCRYPTION_KEY environment variable!")
21
- key = key_str.encode() # Key must be in bytes
22
-
23
- fernet = Fernet(key)
24
-
25
- # Read the encrypted pre-prompt
26
- with open(file_path, "rb") as file:
27
- encrypted_text = file.read()
28
-
29
- # Decrypt and decode the text
30
- decrypted_text = fernet.decrypt(encrypted_text)
31
- return decrypted_text.decode("utf-8")
32
 
33
- # Instead of hardcoding, load the pre-prompt dynamically.
34
  PRE_PROMPT = load_decrypted_preprompt()
35
 
36
- # Default parameters for the QA chain
37
  DEFAULT_TEMPERATURE = 0.7
38
- DEFAULT_MAX_TOKENS = 1024
39
- DEFAULT_TOP_K = 10
40
  DEFAULT_TOP_P = 0.95
41
 
 
 
 
42
  def load_vector_db(index_path="faiss_index", model_name="sentence-transformers/all-MiniLM-L6-v2"):
43
- """
44
- Load the FAISS vector database from disk, allowing dangerous deserialization.
45
- """
46
- embeddings = HuggingFaceEmbeddings(model_name=model_name)
47
- vector_db = FAISS.load_local(
48
- index_path,
49
- embeddings,
50
- allow_dangerous_deserialization=True # Only set this to True if you trust your data source!
51
- )
52
- return vector_db
 
 
 
53
 
54
- def initialize_qa_chain(temperature, max_tokens, top_k, vector_db):
55
  """
56
- Initialize the retrieval-augmented QA chain using the pre-built vector database.
 
57
  """
58
- if vector_db is None:
59
- return None
 
 
 
 
60
 
61
- HF_TOKEN = os.getenv("AMAbot_r", "") # use for publishing
62
- if not HF_TOKEN:
63
- raise ValueError("Missing HF_TOKEN environment variable!")
64
-
65
- llm = HuggingFaceEndpoint(
66
- # repo_id="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
67
- # repo_id="Qwen/Qwen2.5-1.5B-Instruct",
68
- repo_id="google/gemma-2b-it",
69
- huggingfacehub_api_token=HF_TOKEN, # Only needed if the model endpoint requires authentication
70
- temperature=temperature,
71
- max_new_tokens=max_tokens,
72
- top_k=top_k,
73
- task="text-generation"
74
- )
75
-
76
- memory = ConversationSummaryMemory(
77
- llm=llm,
78
- max_token_limit=500, # Adjust this to control the summary size
79
- memory_key="chat_history",
80
- return_messages=True
81
- )
82
 
83
- retriever = vector_db.as_retriever()
84
- qa_chain = ConversationalRetrievalChain.from_llm(
85
- llm,
86
- retriever=retriever,
87
- chain_type="stuff",
88
- memory=memory,
89
- return_source_documents=False, # Do not return source documents
90
- verbose=False,
91
- )
92
- return qa_chain
93
-
94
- def format_chat_history(history):
95
- """
96
- Format chat history (a list of dictionaries) into a list of strings for the QA chain.
97
- Each entry is prefixed with "User:" or "Assistant:" accordingly.
98
- """
99
- formatted = []
100
- for message in history:
101
- if message["role"] == "user":
102
- formatted.append(f"User: {message['content']}")
103
- elif message["role"] == "assistant":
104
- formatted.append(f"Assistant: {message['content']}")
105
- return formatted
106
 
107
  def update_chat(message, history):
108
- """
109
- Append the user's message to the chat history and clear the input box.
110
- Returns:
111
- - Updated chat history (for the Chatbot)
112
- - The user message (to be used as input for the next function)
113
- - An empty string to clear the textbox.
114
- """
115
  if history is None:
116
  history = []
117
  history = history.copy()
@@ -119,161 +82,91 @@ def update_chat(message, history):
119
  return history, message, ""
120
 
121
  def get_assistant_response(message, history, max_tokens, temperature, top_p, qa_chain_state_dict):
122
- qa_chain = qa_chain_state_dict.get("qa_chain")
123
-
124
- if qa_chain is not None:
125
- # Format chat history to the plain-text format expected by the QA chain.
126
- formatted_history = format_chat_history(history)
 
 
 
 
 
 
 
 
 
 
 
127
 
128
- # Update the pre-prompt to encourage speculative responses.
129
- speculative_pre_prompt = PRE_PROMPT + "\nIf you're not completely sure, please provide your best guess and mention that it is speculative."
130
- combined_question = speculative_pre_prompt + "\n" + message
131
 
132
- # Try retrieving an answer via the QA chain.
133
- response = qa_chain.invoke({"question": combined_question, "chat_history": formatted_history})
134
- answer = response.get("answer", "").strip()
 
 
 
 
 
 
 
 
135
 
136
- # If no answer is returned, try the fallback plain chat mode with adjusted parameters.
137
- if not answer:
138
- # Increase temperature and optionally max_tokens for fallback.
139
- increased_temperature = min(temperature + 0.2, 1.0) # Cap temperature at 1.0
140
- increased_max_tokens = max_tokens + 128 # Increase max tokens for a longer response if needed
141
-
142
- speculative_prompt = speculative_pre_prompt + "\n" + message
143
- messages = [{"role": "system", "content": speculative_prompt}] + history
144
- response = ""
145
- result = client.chat_completion(
146
- messages,
147
- max_tokens=increased_max_tokens,
148
- stream=False,
149
- temperature=increased_temperature,
150
- top_p=top_p,
151
- )
152
- for token_message in result:
153
- token = token_message.choices[0].delta.content
154
- response += token
155
- answer = response.strip()
156
-
157
- # Final fallback if still empty.
158
- if not answer:
159
- answer = ("I'm sorry, I couldn't retrieve a clear answer. "
160
- "However, based on the available context, here is my best guess: "
161
- "[speculative answer].")
162
 
163
- history.append({"role": "assistant", "content": answer})
164
- return history, {"qa_chain": qa_chain}
165
-
166
- # Fallback: Plain Chat Mode using the InferenceClient when no QA chain is available.
167
- messages = [{"role": "system", "content": PRE_PROMPT}] + history
168
- response = ""
169
- result = client.chat_completion(
170
- messages,
171
- max_tokens=max_tokens,
172
- stream=False,
173
- temperature=temperature,
174
- top_p=top_p,
175
- )
176
- # for token_message in result:
177
- # token = token_message.choices[0].delta.content
178
- # response += token
179
-
180
- response = result.choices[0].message.content.strip()
181
-
182
 
183
- response = response.strip()
184
- if not response:
185
- response = ("I'm sorry, I couldn't generate a response. Please try asking in a different way. "
186
- "Alternatively, consider contacting Christopher directly: https://gcmarais.com/contact/")
187
-
188
- history.append({"role": "assistant", "content": response})
189
- return history, {"qa_chain": qa_chain}
190
 
191
 
192
- HF_TOKEN = os.getenv("AMAbot_r", "") # use for publishing
 
193
  if not HF_TOKEN:
194
- raise ValueError("Missing HF_TOKEN environment variable!")
195
- # Global InferenceClient for plain chat (fallback)
196
- client = InferenceClient(
197
- # "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
198
- # "Qwen/Qwen2.5-1.5B-Instruct",
199
- "google/gemma-2b-it",
200
- token=HF_TOKEN)
201
-
202
- # --- Auto-load vector database and initialize QA chain at startup ---
203
- try:
204
- vector_db = load_vector_db("faiss_index")
205
- db_status_msg = "Vector DB loaded successfully."
206
- except Exception as e:
207
- vector_db = None
208
- db_status_msg = f"Failed to load Vector DB: {e}"
209
 
210
- if vector_db:
211
- qa_chain = initialize_qa_chain(DEFAULT_TEMPERATURE, DEFAULT_MAX_TOKENS, DEFAULT_TOP_K, vector_db)
212
- else:
213
- qa_chain = None
214
-
215
- qa_chain_state_initial = {"qa_chain": qa_chain}
216
-
217
- # New function to immediately send an example query:
218
- def send_example(example_text, history, max_tokens, temperature, top_p, qa_chain_state):
219
- if history is None:
220
- history = []
221
- # Simulate appending the user's message.
222
- history, _, _ = update_chat(example_text, history)
223
- # Get the assistant's response.
224
- history, qa_chain_state = get_assistant_response(example_text, history, max_tokens, temperature, top_p, qa_chain_state)
225
- # Also hide the examples row.
226
- return history, qa_chain_state, gr.update(visible=False)
227
 
228
- # ---------------------------
229
- # Gradio Interface Layout
230
- # ---------------------------
231
- # Create a theme instance using one of Gradio's prebuilt themes
232
- # Custom CSS that forces light mode regardless of browser settings.
233
- custom_css = """
234
- :root {
235
- --primary-200: transparent !important;
236
- color-scheme: light !important;
237
- background-color: #fff !important;
238
- color: #333 !important;
239
- }
240
 
241
- /* Override the background color for user messages in the Chatbot */
242
- #chatbot .message.user {
243
- background-color: #ccc !important; /* Grey background */
244
- color: #222 !important;
245
- }
246
- .gradio-container footer {
247
- display: none !important;
248
- }
249
- .gradio-container {
250
- width: 100% !important;
251
- max-width: none !important;
252
- margin: 0;
253
- }
254
- .gradio-container .fillable {
255
- width: 100% !important;
256
- max-width: unset !important;
257
- margin: 0;
258
- }
259
- .hf-chat-input textarea:focus {
260
- outline: none !important;
261
- box-shadow: none !important;
262
- border-color: #c2c2c2 !important;
263
- }
264
- .hf-chat-input:focus {
265
- outline: none !important;
266
- box-shadow: none !important;
267
- border-color: #c2c2c2 !important; /* or use your preferred grey */
268
- }
269
- .block-container {
270
- width: 100% !important;
271
- max-width: none !important;
272
- }
273
- """
274
 
275
- with gr.Blocks(fill_width=True, css=custom_css, theme=gr.themes.Default(primary_hue="sky")) as demo:
276
- # Insert custom CSS for layout:
 
 
 
277
  gr.HTML("""
278
  <script>
279
  window.addEventListener("load", () => {
@@ -282,10 +175,42 @@ with gr.Blocks(fill_width=True, css=custom_css, theme=gr.themes.Default(primary_
282
  </script>
283
  <style>
284
  :root {
 
285
  color-scheme: light !important;
286
  background-color: #fff !important;
287
  color: #333 !important;
288
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  body .gradio-container .chatbot .hf-chat-input button .textbox textarea {
290
  background-color: #fff !important;
291
  color: #333 !important;
@@ -295,22 +220,19 @@ with gr.Blocks(fill_width=True, css=custom_css, theme=gr.themes.Default(primary_
295
  width: 100% !important;
296
  display: flex;
297
  flex-direction: row;
298
- flex-wrap: wrap; /* Will wrap to vertical if there's not enough space */
299
- justify-content: center; /* or flex-start, depending on your layout preference */
300
- gap: 10px; /* optional: add spacing between buttons */
301
  }
302
-
303
- /* Container for the input box and embedded send button */
304
  .input-container {
305
  position: relative;
306
  width: 100%;
307
  }
308
- /* Style for the input text to mimic Hugging Face Chat UI */
309
  .hf-chat-input {
310
  background-color: #f9f9f9;
311
  border: 1px solid #e0e0e0;
312
  border-radius: 20px;
313
- padding: 10px 50px 10px 20px; /* extra right padding to make room for the send button */
314
  font-size: 16px;
315
  width: 100%;
316
  box-sizing: border-box;
@@ -320,36 +242,33 @@ with gr.Blocks(fill_width=True, css=custom_css, theme=gr.themes.Default(primary_
320
  outline: none;
321
  border-color: #c2c2c2;
322
  }
323
-
324
- /* Style for the embedded send button */
325
  .send-button {
326
  position: absolute;
327
- right: 10px; /* adjust as needed */
328
  top: 50%;
329
  transform: translateY(-50%);
330
- width: 15px !important; /* desired width */
331
- height: 30px !important; /* desired height */
332
  padding: 0;
333
  background: #fff;
334
  border: none;
335
  border-radius: 50%;
336
  cursor: pointer;
337
  transition: background-color 0.2s ease;
338
- display: flex; /* use flexbox for centering */
339
  align-items: center;
340
  justify-content: center;
341
- font-size: 16px; /* ensure consistent text size */
342
  line-height: 1;
343
  }
344
  .send-button:hover,
345
  .send-button:focus,
346
  .send-button:active {
347
  background-color: #f0f0f0;
348
- outline: none; /* remove focus outline */
349
  top: 50% !important;
350
  transform: translateY(-50%) !important;
351
  }
352
- /* Overall input row styling */
353
  .input-row {
354
  display: flex;
355
  align-items: center;
@@ -359,28 +278,21 @@ with gr.Blocks(fill_width=True, css=custom_css, theme=gr.themes.Default(primary_
359
  </style>
360
  """)
361
 
362
- # Keep the QA chain state in Gradio
363
  qa_chain_state = gr.State(value=qa_chain_state_initial)
364
- # Hidden state to temporarily hold the user message for processing
365
  user_message_state = gr.State()
366
 
367
- # Chat window using dictionary message format; initially hidden
368
  chatbot = gr.Chatbot(label="AMAbot", show_label=True, elem_id="chatbot", height=250, type="messages", visible=False)
369
 
370
- # ---------------------------
371
- # Example Inputs Row (clickable examples)
372
- # ---------------------------
373
  with gr.Row(elem_classes="example-row", visible=True) as examples_container:
374
  ex1 = gr.Button("Who?")
375
  ex2 = gr.Button("Where?")
376
  ex3 = gr.Button("What?")
377
 
378
- # Immediately show the chatbot when an example button is clicked (non-blocking)
379
  ex1.click(lambda: gr.update(visible=True), None, chatbot, queue=False)
380
  ex2.click(lambda: gr.update(visible=True), None, chatbot, queue=False)
381
  ex3.click(lambda: gr.update(visible=True), None, chatbot, queue=False)
382
 
383
- # Input row: Embed the send button inside the text input box container.
384
  with gr.Row(elem_classes="input-row"):
385
  with gr.Column(elem_classes="input-container"):
386
  user_input = gr.Textbox(
@@ -391,18 +303,16 @@ with gr.Blocks(fill_width=True, css=custom_css, theme=gr.themes.Default(primary_
391
  )
392
  send_btn = gr.Button("❯❯", elem_classes="send-button")
393
 
394
- # Hidden inputs for fixed parameters
395
  max_tokens_input = gr.Number(value=DEFAULT_MAX_TOKENS, visible=False)
396
  temperature_input = gr.Number(value=DEFAULT_TEMPERATURE, visible=False)
397
  top_p_input = gr.Number(value=DEFAULT_TOP_P, visible=False)
398
 
399
- # Immediately show the chatbot when the send button is clicked or Enter is pressed
400
  user_input.submit(lambda: gr.update(visible=True), None, chatbot, queue=False)
401
  send_btn.click(lambda: gr.update(visible=True), None, chatbot, queue=False)
402
 
403
- # ---------------------------
404
- # Bind events for manual text submission.
405
- # ---------------------------
406
  user_input.submit(
407
  update_chat,
408
  inputs=[user_input, chatbot],
@@ -413,6 +323,7 @@ with gr.Blocks(fill_width=True, css=custom_css, theme=gr.themes.Default(primary_
413
  outputs=[chatbot, qa_chain_state]
414
  )
415
 
 
416
  send_btn.click(
417
  update_chat,
418
  inputs=[user_input, chatbot],
@@ -423,9 +334,7 @@ with gr.Blocks(fill_width=True, css=custom_css, theme=gr.themes.Default(primary_
423
  outputs=[chatbot, qa_chain_state]
424
  )
425
 
426
- # ---------------------------
427
- # Bind events for example buttons.
428
- # ---------------------------
429
  ex1.click(
430
  lambda history: update_chat("Who is Christopher?", history)[:2],
431
  inputs=[chatbot],
@@ -447,7 +356,7 @@ with gr.Blocks(fill_width=True, css=custom_css, theme=gr.themes.Default(primary_
447
  )
448
 
449
  ex3.click(
450
- lambda history: update_chat("What degrees does Christopher have, and what job titles has he held?", history)[:2],
451
  inputs=[chatbot],
452
  outputs=[chatbot, user_message_state]
453
  ).then(
@@ -457,4 +366,4 @@ with gr.Blocks(fill_width=True, css=custom_css, theme=gr.themes.Default(primary_
457
  )
458
 
459
  if __name__ == "__main__":
460
- demo.queue().launch(show_api=False)
 
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
  from cryptography.fernet import Fernet
5
+
6
+ # --- LangChain / RAG Imports (from your first script) ---
7
  from langchain_community.vectorstores import FAISS
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain_huggingface import HuggingFaceEmbeddings
10
+
11
+ # --- Core Functions (from your first script) ---
12
 
13
  def load_decrypted_preprompt(file_path="pre_prompt.enc"):
14
  """
15
+ Load and decrypt the pre-prompt from the encrypted file using the key
16
+ stored in the environment variable 'KEY'.
17
  """
18
+ try:
19
+ key_str = os.getenv("KEY", "")
20
+ if not key_str:
21
+ print("Warning: KEY environment variable not set, using default preprompt")
22
+ return "You are AMAbot, a helpful assistant that answers questions about Christopher."
23
+ key = key_str.encode()
24
+ fernet = Fernet(key)
25
+ with open(file_path, "rb") as file:
26
+ encrypted_text = file.read()
27
+ decrypted_text = fernet.decrypt(encrypted_text)
28
+ return decrypted_text.decode("utf-8")
29
+ except Exception as e:
30
+ print(f"Error loading preprompt: {e}, using default")
31
+ return "You are AMAbot, a helpful assistant that answers questions about Christopher."
 
32
 
 
33
  PRE_PROMPT = load_decrypted_preprompt()
34
 
 
35
  DEFAULT_TEMPERATURE = 0.7
36
+ DEFAULT_MAX_TOKENS = 512
37
+ DEFAULT_TOP_K = 50
38
  DEFAULT_TOP_P = 0.95
39
 
40
+ # Using the model from your first script
41
+ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
42
+
43
  def load_vector_db(index_path="faiss_index", model_name="sentence-transformers/all-MiniLM-L6-v2"):
44
+ """Load the FAISS vector database from disk."""
45
+ try:
46
+ embeddings = HuggingFaceEmbeddings(model_name=model_name)
47
+ vector_db = FAISS.load_local(
48
+ index_path,
49
+ embeddings,
50
+ allow_dangerous_deserialization=True
51
+ )
52
+ print(f"Successfully loaded vector database from {index_path}")
53
+ return vector_db
54
+ except Exception as e:
55
+ print(f"Failed to load vector database: {e}")
56
+ return None
57
 
58
+ def create_qa_prompt():
59
  """
60
+ Create a prompt template for QA, formatted for Zephyr/Mistral models.
61
+ This is the specific prompt format Zephyr was trained on.
62
  """
63
+ template = """<|system|>
64
+ You are a helpful assistant that answers questions using the context provided.
65
+ If you don't know the answer based on the context, just say that you don't know. Don't try to make up an answer.</s>
66
+ <|user|>
67
+ Context:
68
+ {context}
69
 
70
+ Question: {question}</s>
71
+ <|assistant|>
72
+ Helpful Answer:"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
+ return PromptTemplate(template=template, input_variables=["context", "question"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  def update_chat(message, history):
77
+ """Append the user's message to the chat history and clear the input box."""
 
 
 
 
 
 
78
  if history is None:
79
  history = []
80
  history = history.copy()
 
82
  return history, message, ""
83
 
84
  def get_assistant_response(message, history, max_tokens, temperature, top_p, qa_chain_state_dict):
85
+ """
86
+ Generate assistant response by manually running the RAG pipeline
87
+ and using the chat_completion endpoint. This is the logic from your first script.
88
+ """
89
+ vector_db = qa_chain_state_dict.get("vector_db")
90
+ answer = "I apologize, but I'm having trouble accessing my knowledge base right now."
91
+
92
+ if not vector_db:
93
+ print("Error: Vector DB is not available.")
94
+ history.append({"role": "assistant", "content": answer})
95
+ return history, qa_chain_state_dict
96
+
97
+ try:
98
+ # 1. Retrieve relevant documents from the vector store
99
+ retriever = vector_db.as_retriever(search_kwargs={"k": 3})
100
+ retrieved_docs = retriever.invoke(message)
101
 
102
+ # 2. Format the context for the prompt
103
+ context = "\n\n".join([doc.page_content for doc in retrieved_docs])
 
104
 
105
+ # 3. Create the prompt using the correct template for Zephyr
106
+ qa_prompt_template = create_qa_prompt()
107
+ formatted_prompt = qa_prompt_template.format(context=context, question=message)
108
+
109
+ # 4. Prepare the message payload for the conversational API
110
+ messages = [
111
+ {
112
+ "role": "user",
113
+ "content": formatted_prompt,
114
+ }
115
+ ]
116
 
117
+ # 5. Call the correct API endpoint
118
+ print("Attempting to call chat_completion API...")
119
+ client = InferenceClient(MODEL_NAME, token=os.getenv("HF_TOKEN", ""))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ response = client.chat_completion(
122
+ messages=messages,
123
+ max_tokens=max_tokens,
124
+ temperature=temperature if temperature > 0 else 0.1, # Temp must be > 0 for chat
125
+ top_p=top_p,
126
+ stream=False
127
+ )
128
+
129
+ # 6. Extract the answer
130
+ if response.choices and response.choices[0].message:
131
+ answer = response.choices[0].message.content.strip()
132
+ print(f"API call successful, answer length: {len(answer)}")
133
+ else:
134
+ print("API returned an empty response.")
135
+
136
+ except Exception as e:
137
+ print(f"An error occurred in get_assistant_response: {type(e).__name__} - {repr(e)}")
138
+ answer = f"I'm experiencing technical difficulties. Please try again. (Error: {str(e)[:100]})"
 
139
 
140
+ history.append({"role": "assistant", "content": answer})
141
+ return history, qa_chain_state_dict
 
 
 
 
 
142
 
143
 
144
+ # --- Initialize Components (from your first script) ---
145
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
146
  if not HF_TOKEN:
147
+ print("Warning: HF_TOKEN token not set in environment variables!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ # Load vector database
150
+ vector_db = load_vector_db("faiss_index")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
+ # Prepare the initial state dictionary with the vector_db
153
+ qa_chain_state_initial = {"vector_db": vector_db}
 
 
 
 
 
 
 
 
 
 
154
 
155
+ # Test the vector DB setup
156
+ if vector_db:
157
+ print("Testing vector database...")
158
+ try:
159
+ test_retriever = vector_db.as_retriever(search_kwargs={"k": 1})
160
+ test_docs = test_retriever.invoke("test query")
161
+ print("Vector DB test successful, can retrieve documents")
162
+ except Exception as e:
163
+ print(f"Vector DB test failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
+ # ------------------------------------------------------------------
166
+ # Gradio Interface Layout (from your second script)
167
+ # ------------------------------------------------------------------
168
+ with gr.Blocks(fill_width=True, theme=gr.themes.Default(primary_hue="sky")) as demo:
169
+ # This HTML block contains all the CSS and JS for the desired layout
170
  gr.HTML("""
171
  <script>
172
  window.addEventListener("load", () => {
 
175
  </script>
176
  <style>
177
  :root {
178
+ --primary-200: transparent !important;
179
  color-scheme: light !important;
180
  background-color: #fff !important;
181
  color: #333 !important;
182
  }
183
+ #chatbot .message.user {
184
+ background-color: #ccc !important;
185
+ color: #222 !important;
186
+ }
187
+ .gradio-container footer {
188
+ display: none !important;
189
+ }
190
+ .gradio-container {
191
+ width: 100% !important;
192
+ max-width: none !important;
193
+ margin: 0;
194
+ }
195
+ .gradio-container .fillable {
196
+ width: 100% !important;
197
+ max-width: unset !important;
198
+ margin: 0;
199
+ }
200
+ .hf-chat-input textarea:focus {
201
+ outline: none !important;
202
+ box-shadow: none !important;
203
+ border-color: #c2c2c2 !important;
204
+ }
205
+ .hf-chat-input:focus {
206
+ outline: none !important;
207
+ box-shadow: none !important;
208
+ border-color: #c2c2c2 !important;
209
+ }
210
+ .block-container {
211
+ width: 100% !important;
212
+ max-width: none !important;
213
+ }
214
  body .gradio-container .chatbot .hf-chat-input button .textbox textarea {
215
  background-color: #fff !important;
216
  color: #333 !important;
 
220
  width: 100% !important;
221
  display: flex;
222
  flex-direction: row;
223
+ flex-wrap: wrap;
224
+ justify-content: center;
225
+ gap: 10px;
226
  }
 
 
227
  .input-container {
228
  position: relative;
229
  width: 100%;
230
  }
 
231
  .hf-chat-input {
232
  background-color: #f9f9f9;
233
  border: 1px solid #e0e0e0;
234
  border-radius: 20px;
235
+ padding: 10px 50px 10px 20px;
236
  font-size: 16px;
237
  width: 100%;
238
  box-sizing: border-box;
 
242
  outline: none;
243
  border-color: #c2c2c2;
244
  }
 
 
245
  .send-button {
246
  position: absolute;
247
+ right: 10px;
248
  top: 50%;
249
  transform: translateY(-50%);
250
+ width: 15px !important;
251
+ height: 30px !important;
252
  padding: 0;
253
  background: #fff;
254
  border: none;
255
  border-radius: 50%;
256
  cursor: pointer;
257
  transition: background-color 0.2s ease;
258
+ display: flex;
259
  align-items: center;
260
  justify-content: center;
261
+ font-size: 16px;
262
  line-height: 1;
263
  }
264
  .send-button:hover,
265
  .send-button:focus,
266
  .send-button:active {
267
  background-color: #f0f0f0;
268
+ outline: none;
269
  top: 50% !important;
270
  transform: translateY(-50%) !important;
271
  }
 
272
  .input-row {
273
  display: flex;
274
  align-items: center;
 
278
  </style>
279
  """)
280
 
281
+ # State management remains the same
282
  qa_chain_state = gr.State(value=qa_chain_state_initial)
 
283
  user_message_state = gr.State()
284
 
 
285
  chatbot = gr.Chatbot(label="AMAbot", show_label=True, elem_id="chatbot", height=250, type="messages", visible=False)
286
 
 
 
 
287
  with gr.Row(elem_classes="example-row", visible=True) as examples_container:
288
  ex1 = gr.Button("Who?")
289
  ex2 = gr.Button("Where?")
290
  ex3 = gr.Button("What?")
291
 
 
292
  ex1.click(lambda: gr.update(visible=True), None, chatbot, queue=False)
293
  ex2.click(lambda: gr.update(visible=True), None, chatbot, queue=False)
294
  ex3.click(lambda: gr.update(visible=True), None, chatbot, queue=False)
295
 
 
296
  with gr.Row(elem_classes="input-row"):
297
  with gr.Column(elem_classes="input-container"):
298
  user_input = gr.Textbox(
 
303
  )
304
  send_btn = gr.Button("❯❯", elem_classes="send-button")
305
 
306
+ # Hidden inputs for model parameters
307
  max_tokens_input = gr.Number(value=DEFAULT_MAX_TOKENS, visible=False)
308
  temperature_input = gr.Number(value=DEFAULT_TEMPERATURE, visible=False)
309
  top_p_input = gr.Number(value=DEFAULT_TOP_P, visible=False)
310
 
311
+ # --- Event Handlers (Unchanged, as they correctly call the functions) ---
312
  user_input.submit(lambda: gr.update(visible=True), None, chatbot, queue=False)
313
  send_btn.click(lambda: gr.update(visible=True), None, chatbot, queue=False)
314
 
315
+ # Submit action for text input
 
 
316
  user_input.submit(
317
  update_chat,
318
  inputs=[user_input, chatbot],
 
323
  outputs=[chatbot, qa_chain_state]
324
  )
325
 
326
+ # Click action for send button
327
  send_btn.click(
328
  update_chat,
329
  inputs=[user_input, chatbot],
 
334
  outputs=[chatbot, qa_chain_state]
335
  )
336
 
337
+ # Click actions for example buttons
 
 
338
  ex1.click(
339
  lambda history: update_chat("Who is Christopher?", history)[:2],
340
  inputs=[chatbot],
 
356
  )
357
 
358
  ex3.click(
359
+ lambda history: update_chat("What degrees does Christopher have, and what technical experience does he have?", history)[:2],
360
  inputs=[chatbot],
361
  outputs=[chatbot, user_message_state]
362
  ).then(
 
366
  )
367
 
368
  if __name__ == "__main__":
369
+ demo.queue().launch(show_api=False, share=True)