Crackershoot commited on
Commit
853a786
·
verified ·
1 Parent(s): 0526966

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +434 -176
app.py CHANGED
@@ -1,11 +1,9 @@
1
- # Import necessary libraries
2
  import logging
3
  import sys
4
  import os
5
- import time
6
- import json
7
- from datetime import datetime
8
 
 
9
  from agno.agent import Agent
10
  from agno.models.openai import OpenAIChat
11
  from agno.knowledge.embedder.openai import OpenAIEmbedder
@@ -13,32 +11,40 @@ from agno.tools.duckduckgo import DuckDuckGoTools
13
  from agno.knowledge.knowledge import Knowledge
14
  from agno.vectordb.lancedb import LanceDb, SearchType
15
 
 
16
  import gradio as gr
17
- import fitz
18
- from PIL import Image
19
- import io
20
- import requests
21
- import re
22
 
 
 
 
 
 
 
 
 
23
  logging.basicConfig(stream=sys.stdout, level=logging.INFO)
 
24
  logger = logging.getLogger(__name__)
25
 
 
26
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 
27
  if not OPENAI_API_KEY:
28
  raise ValueError("Missing OPENAI_API_KEY")
29
 
30
- # =========================
31
- # Knowledge Base
32
- # =========================
33
  knowledge = Knowledge(
 
34
  vector_db=LanceDb(
35
- uri="tmp/lancedb",
36
- table_name="pdf_documents",
37
- search_type=SearchType.vector,
 
38
  embedder=OpenAIEmbedder(id="text-embedding-3-small"),
39
  )
40
  )
41
 
 
42
  pdf_urls = [
43
  "https://media.datacamp.com/cms/working-with-hugging-face.pdf",
44
  "https://media.datacamp.com/cms/ai-agents-cheat-sheet.pdf",
@@ -48,208 +54,460 @@ pdf_urls = [
48
  "https://media.datacamp.com/cms/python-basics-cheat-sheet-v4.pdf"
49
  ]
50
 
 
51
  def download_if_needed(url, filename):
 
52
  if not os.path.exists(filename):
53
  logger.info(f"Downloading {url}...")
 
54
  response = requests.get(url)
 
55
  with open(filename, "wb") as f:
 
56
  f.write(response.content)
 
57
 
 
 
58
  os.makedirs("pdf_cache", exist_ok=True)
59
 
 
60
  def add_pdfs_to_knowledge():
61
- contents = []
 
 
 
 
62
  for i, url in enumerate(pdf_urls):
 
63
  filename = f"pdf_cache/file_{i}.pdf"
64
- download_if_needed(url, filename)
65
- contents.append({"path": filename, "metadata": {"source": url}})
66
-
67
- if hasattr(knowledge, 'add_contents'):
68
- knowledge.add_contents(contents)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  else:
70
- for c in contents:
71
- knowledge.add_content(**c)
72
 
73
- if not os.path.exists("tmp/lancedb"):
74
- add_pdfs_to_knowledge()
75
 
76
- # =========================
77
- # Agent
78
- # =========================
79
  agent = Agent(
 
80
  model=OpenAIChat(id="gpt-4.1-mini", temperature=0.2),
 
81
  description="You are Dox a data expert!",
82
- instructions="""You are a data professional's assistant named Dox.
 
 
83
  Your primary goal is to answer questions about data, programming, cloud computing, AI/ML, and technology topics.
84
- 1. Prioritize knowledge base, then web search if needed.
85
- 2. Keep answers concise (300-500 words).
86
- 3. Always cite sources with links.
87
- 4. Never hallucinate.""",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  knowledge=knowledge,
 
89
  add_datetime_to_context=True,
 
90
  add_location_to_context=True,
 
91
  search_knowledge=True,
 
92
  tools=[DuckDuckGoTools()],
 
93
  markdown=True
94
  )
95
 
96
- # =========================
97
- # Feedback Storage
98
- # =========================
99
- def save_feedback(question, response, feedback, comment=""):
100
- data = {
101
- "timestamp": str(datetime.now()),
102
- "question": question,
103
- "response": response,
104
- "feedback": feedback,
105
- "comment": comment
106
- }
107
- with open("feedback.jsonl", "a") as f:
108
- f.write(json.dumps(data) + "\n")
109
-
110
- # =========================
111
- # Agent Call
112
- # =========================
113
- def ask_agent(question, mode):
114
- try:
115
- if mode == "Knowledge":
116
- response = agent.run(question, use_knowledge=True, tools=[])
117
- elif mode == "Web":
118
- response = agent.run(question, use_knowledge=False)
119
- else:
120
- response = agent.run(question, use_knowledge=True)
121
-
122
- full = response.get_content_as_string()
123
 
 
 
 
 
 
 
 
 
124
  except Exception as e:
125
- return "❌ Error occurred", None
126
-
127
- matches = re.findall(r'https?://[^\s)]+\.pdf(?:\?[^\s)]*)?', full)
128
- link = matches[0] if matches else None
129
-
130
- full += "\n\n---\n**📋 Dox would appreciate your feedback! ⬇️**"
131
- return full, link
132
-
133
- # =========================
134
- # PDF Handling
135
- # =========================
136
- def display_pdf(url):
137
- if not url:
138
- return gr.update(value=None, visible=False), ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  try:
140
- pdf_bytes = requests.get(url).content
 
 
141
  doc = fitz.open(stream=pdf_bytes, filetype="pdf")
142
- pix = doc[0].get_pixmap()
 
 
 
 
 
 
 
143
  img = Image.open(io.BytesIO(pix.tobytes("png")))
144
- return img, ""
145
- except:
146
- return None, "❌ Failed"
147
-
148
- # =========================
149
- # UI
150
- # =========================
151
- theme = gr.themes.Ocean()
152
-
153
- with gr.Blocks(theme=theme) as demo:
 
 
 
 
 
 
 
 
 
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  gr.Markdown("# 🤖 Dox the Data Professional's Advisor 🤖")
156
-
157
- mode_selector = gr.Radio(
158
- ["Knowledge", "Web", "Hybrid"],
159
- value="Hybrid",
160
- label="📊 Source Mode"
161
- )
162
-
163
- chatbot = gr.Chatbot(height=450)
164
- question = gr.Textbox()
165
-
166
  with gr.Row():
167
- ask_btn = gr.Button("Submit 📤")
168
- regen_btn = gr.Button("🔄 Regenerate")
169
- clear_btn = gr.Button("🧹 Clear")
170
-
171
- thumbs_up = gr.Button("👍 Helpful")
172
- thumbs_down = gr.Button("👎 Not Helpful")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
- feedback_box = gr.Textbox(visible=False)
175
- feedback_status = gr.Markdown()
176
 
177
- link_state = gr.State()
178
- pdf_img = gr.Image()
 
 
 
 
 
 
179
 
180
- # =========================
181
- # Chat Logic (STREAMING)
182
- # =========================
183
- def chat_ui(msg, history, mode):
184
- if history is None:
185
- history = []
186
 
187
- history.append({"role": "user", "content": msg})
188
- history.append({"role": "assistant", "content": "🤔 Thinking..."})
 
 
 
 
 
 
 
189
 
190
- yield history, None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
- partial = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
- try:
195
- if hasattr(agent, "run_stream"):
196
- for chunk in agent.run_stream(msg):
197
- partial += chunk
198
- history[-1]["content"] = partial
199
- yield history, None, None
200
- else:
201
- full, link = ask_agent(msg, mode)
202
- for c in full:
203
- partial += c
204
- history[-1]["content"] = partial
205
- time.sleep(0.002)
206
- yield history, None, None
207
-
208
- matches = re.findall(r'https?://[^\s)]+\.pdf', partial)
209
- link = matches[0] if matches else None
210
-
211
- yield history, link, None
212
-
213
- except:
214
- history[-1]["content"] = "❌ Error"
215
- yield history, None, None
216
-
217
- # =========================
218
- # Regenerate
219
- # =========================
220
- def regen(history):
221
- if len(history) < 2:
222
- return history, None, None
223
-
224
- last = history[-2]["content"]
225
- return chat_ui(last, history[:-1], "Hybrid")
226
-
227
- # =========================
228
- # Feedback
229
- # =========================
230
- def up(history):
231
- save_feedback(history[-2]["content"], history[-1]["content"], "up")
232
- return "✅ Thanks!"
233
-
234
- def down(text, history):
235
- save_feedback(history[-2]["content"], history[-1]["content"], "down", text)
236
- return "✅ Feedback saved"
237
-
238
- # =========================
239
- # Bindings
240
- # =========================
241
- ask_btn.click(chat_ui, [question, chatbot, mode_selector], [chatbot, link_state, pdf_img])
242
- question.submit(chat_ui, [question, chatbot, mode_selector], [chatbot, link_state, pdf_img])
243
-
244
- regen_btn.click(regen, chatbot, [chatbot, link_state, pdf_img])
245
-
246
- thumbs_up.click(up, chatbot, feedback_status)
247
- thumbs_down.click(lambda: gr.update(visible=True), None, feedback_box)
248
-
249
- clear_btn.click(lambda: ([], None, None), None, [chatbot, link_state, pdf_img])
250
-
251
- # =========================
252
- # Launch
253
- # =========================
254
  if __name__ == "__main__":
 
255
  demo.launch()
 
1
+ # Import necessary libraries for logging, system operations, and file handling.
2
  import logging
3
  import sys
4
  import os
 
 
 
5
 
6
+ # Import core components from the 'agno' library for building the agent.
7
  from agno.agent import Agent
8
  from agno.models.openai import OpenAIChat
9
  from agno.knowledge.embedder.openai import OpenAIEmbedder
 
11
  from agno.knowledge.knowledge import Knowledge
12
  from agno.vectordb.lancedb import LanceDb, SearchType
13
 
14
+ # Import Gradio for creating the web user interface.
15
  import gradio as gr
 
 
 
 
 
16
 
17
+ # Import libraries for handling PDFs and images.
18
+ import fitz # PyMuPDF, used for PDF processing.
19
+ from PIL import Image # Pillow library for image manipulation.
20
+ import io # Used to handle in-memory binary streams.
21
+ import requests # For making HTTP requests to download files.
22
+ import re # Regular expressions for searching text patterns.
23
+
24
+ # Configure basic logging to output messages to the console.
25
  logging.basicConfig(stream=sys.stdout, level=logging.INFO)
26
+ # Get a logger instance for this script.
27
  logger = logging.getLogger(__name__)
28
 
29
+ # Retrieve the OpenAI API key from environment variables.
30
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
31
+ # If the API key is not found, raise an error.
32
  if not OPENAI_API_KEY:
33
  raise ValueError("Missing OPENAI_API_KEY")
34
 
35
+ # Initialize the Knowledge Base for the agent.
 
 
36
  knowledge = Knowledge(
37
+ # Use LanceDB as the vector database to store and search document embeddings.
38
  vector_db=LanceDb(
39
+ uri="tmp/lancedb", # Directory to store the database.
40
+ table_name="pdf_documents", # Name of the table within the database.
41
+ search_type=SearchType.vector, # Use vector search for finding relevant documents.
42
+ # Use OpenAI's embedding model to convert text into numerical vectors.
43
  embedder=OpenAIEmbedder(id="text-embedding-3-small"),
44
  )
45
  )
46
 
47
+ # A list of URLs pointing to PDF documents that will be added to the knowledge base.
48
  pdf_urls = [
49
  "https://media.datacamp.com/cms/working-with-hugging-face.pdf",
50
  "https://media.datacamp.com/cms/ai-agents-cheat-sheet.pdf",
 
54
  "https://media.datacamp.com/cms/python-basics-cheat-sheet-v4.pdf"
55
  ]
56
 
57
+ # Defines a function to download a file from a URL if it doesn't already exist locally.
58
  def download_if_needed(url, filename):
59
+ # Check if the file path does not exist.
60
  if not os.path.exists(filename):
61
  logger.info(f"Downloading {url}...")
62
+ # Send an HTTP GET request to the URL.
63
  response = requests.get(url)
64
+ # Open the local file in write-binary mode.
65
  with open(filename, "wb") as f:
66
+ # Write the content of the response to the file.
67
  f.write(response.content)
68
+ logger.info(f"Downloaded {filename} ({len(response.content)} bytes)")
69
 
70
+ # Create a directory named 'pdf_cache' to store downloaded PDF files.
71
+ # 'exist_ok=True' prevents an error if the directory already exists.
72
  os.makedirs("pdf_cache", exist_ok=True)
73
 
74
+ # Defines a function to add the specified PDFs to the agent's knowledge base.
75
  def add_pdfs_to_knowledge():
76
+ """Add PDFs to knowledge base using the correct method for the installed agno version"""
77
+ # Create an empty list to hold information about the content to be added.
78
+ contents_to_add = []
79
+
80
+ # Loop through the list of PDF URLs with their index.
81
  for i, url in enumerate(pdf_urls):
82
+ # Define a local filename for the cached PDF.
83
  filename = f"pdf_cache/file_{i}.pdf"
84
+ try:
85
+ # Download the PDF if it's not already in the cache.
86
+ download_if_needed(url, filename)
87
+ # Prepare a dictionary with the file path and metadata (source URL).
88
+ contents_to_add.append({
89
+ "path": filename,
90
+ "metadata": {"source": url}
91
+ })
92
+ logger.info(f"Prepared PDF {i+1}: {url}")
93
+ except Exception as e:
94
+ # Log an error if the PDF preparation fails.
95
+ logger.error(f"Failed to prepare PDF {i+1}: {str(e)}")
96
+
97
+ # Proceed only if there are PDFs to add.
98
+ if contents_to_add:
99
+ try:
100
+ # This block checks for the correct method to add documents based on the 'agno' library version.
101
+ # Check if the 'add_contents' method (for batch processing) exists.
102
+ if hasattr(knowledge, 'add_contents'):
103
+ knowledge.add_contents(contents_to_add)
104
+ logger.info(f"✅ Successfully added {len(contents_to_add)} PDFs using add_contents")
105
+ # Else, check if the 'add_content' method (for single item processing) exists.
106
+ elif hasattr(knowledge, 'add_content'):
107
+ for item in contents_to_add:
108
+ knowledge.add_content(**item)
109
+ logger.info(f"✅ Successfully added {len(contents_to_add)} PDFs using add_content")
110
+ # As a fallback for older versions, manually read and insert the documents.
111
+ else:
112
+ from agno.document.reader.pdf_reader import PDFReader
113
+ reader = PDFReader()
114
+ all_docs = []
115
+ for item in contents_to_add:
116
+ docs = reader.read(item["path"])
117
+ for doc in docs:
118
+ doc.metadata = item["metadata"]
119
+ all_docs.append(doc)
120
+ knowledge.vector_db.insert(documents=all_docs)
121
+ logger.info(f"✅ Successfully added {len(all_docs)} document chunks from {len(contents_to_add)} PDFs")
122
+ except Exception as e:
123
+ # Log and re-raise any exception that occurs during the addition process.
124
+ logger.error(f"Failed to add PDFs: {str(e)}")
125
+ raise
126
  else:
127
+ # Warn if no PDFs were prepared.
128
+ logger.warning("No PDFs were prepared to add")
129
 
130
+ # Call the function to load the PDFs into the knowledge base.
131
+ add_pdfs_to_knowledge()
132
 
133
+ # Initialize the AI agent with its configuration.
 
 
134
  agent = Agent(
135
+ # Set the underlying language model to OpenAI's GPT-4.1-mini with low temperature for more predictable responses.
136
  model=OpenAIChat(id="gpt-4.1-mini", temperature=0.2),
137
+ # Give the agent a name/description.
138
  description="You are Dox a data expert!",
139
+ # Provide detailed instructions (the "system prompt") that govern the agent's behavior.
140
+ instructions="""
141
+ You are a data professional's assistant named Dox.
142
  Your primary goal is to answer questions about data, programming, cloud computing, AI/ML, and technology topics.
143
+ Here are your operating procedures:
144
+ 1. **Information Gathering Strategy**:
145
+ * **Prioritize Knowledge Base**: First, search your internal knowledge base for the answer.
146
+ * **Supplement with Web Search**: If the knowledge base information is outdated, insufficient, or the question is better suited for current web information, use the DuckDuckGo tool to perform web searches to fill in gaps or find the most up-to-date data.
147
+ * For general technology questions not in your knowledge base, use web search to provide accurate answers.
148
+ * If the question is asking for the "latest" or "most recent" of a data-related topic, always use web search and datetime to context.
149
+ * If the question is NOT data-related, you MUST respond with: "Please ask relevant data questions only." and terminate.
150
+ 2. **Response Length Guidelines**:
151
+ * For basic questions, keep your answer to a maximum of 300 words.
152
+ * For complex questions, extend your answer to a maximum of 500 words.
153
+ 3. **Citation Rules (CRITICAL)**:
154
+ * **Knowledge Base Citation**: For any information sourced from your internal knowledge base, you MUST include a citation on a NEW LINE after the answer, starting with "Source: ", followed by the metadata field 'source' to get the hyperlink.
155
+ * **Web Search Citation**: For any information obtained from the web using the DuckDuckGo tool, you MUST include a citation on a NEW LINE after the answer, starting with "Online Source: ", followed by the full hyperlink.
156
+ * **Final Rule for Citations**: Always end your answers with the appropriate citations, ensuring they are on separate lines as specified. Do NOT mix or combine citation types on a single line.
157
+ * ALWAYS cite with links NOT text like "from internal knowledge base"
158
+ 4. **Accuracy and Non-Hallucination**:
159
+ * Provide factual and relevant answers based ONLY on the information found in your knowledge base or through web searches.
160
+ * NEVER invent or hallucinate information. If an answer cannot be found, state that directly.
161
+ Make sure to follow these instructions precisely.
162
+ """,
163
+ # Link the agent to the knowledge base created earlier.
164
  knowledge=knowledge,
165
+ # Automatically add the current date and time to the agent's context.
166
  add_datetime_to_context=True,
167
+ # Automatically add the user's location to the context (if available).
168
  add_location_to_context=True,
169
+ # Enable the agent to search its knowledge base by default.
170
  search_knowledge=True,
171
+ # Equip the agent with tools, in this case, the ability to search the web using DuckDuckGo.
172
  tools=[DuckDuckGoTools()],
173
+ # Enable markdown formatting in the agent's output.
174
  markdown=True
175
  )
176
 
177
+ # Log a success message indicating the agent is ready.
178
+ logger.info("🟢 Agent initialized successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
+ # Defines a function to process a user's question.
181
+ def ask_agent(question):
182
+ logger.info(f"Question asked: {question[:100]}...")
183
+ try:
184
+ # Run the agent with the user's question, ensuring it uses its knowledge base.
185
+ response = agent.run(question, use_knowledge=True)
186
+ # Get the agent's response as a single string.
187
+ full_content = response.get_content_as_string()
188
  except Exception as e:
189
+ logger.error(str(e))
190
+ return "❌ Something went wrong. Please try again.", None
191
+ # Use a regular expression to find the first URL ending in '.pdf' in the response.
192
+ match = re.search(r'https?://[^\s]+\.pdf', full_content, re.IGNORECASE)
193
+ # Extract the link if a match is found, otherwise set it to None.
194
+ link = match.group(0) if match else None
195
+
196
+ if link:
197
+ logger.info(f"PDF link found: {link}")
198
+ else:
199
+ logger.info("🔴 No PDF link found in response")
200
+ # Return the full text response and the extracted PDF link.
201
+ # full_content += "\n\n---\n**🔍 Try asking:**\n- Give me a real example...\n- Explain step by step...\n- Compare with alternatives..."
202
+ full_content += "\n\n---\n**📋 Dox would appreciate your feedback! ⬇️**"
203
+ return full_content, link
204
+
205
+ # Defines a function to download the raw content of a PDF from a URL.
206
+ def download_pdf_from_url(url):
207
+ # Make an HTTP GET request with a timeout.
208
+ response = requests.get(url, timeout=30)
209
+ # Raise an exception if the request was not successful (e.g., 404 error).
210
+ response.raise_for_status()
211
+ # Return the binary content of the PDF.
212
+ return response.content
213
+
214
+ # A Gradio helper function to update the UI while a PDF is being prepared for display.
215
+ def prepare_pdf_loading(link):
216
+ # If a link exists, show a "Loading..." message.
217
+ if link:
218
+ return gr.update(value="📄 Loading PDF preview...", visible=True)
219
+ # Otherwise, hide the message.
220
+ return gr.update(value="❌ No PDF for preview", visible=True)
221
+
222
+ # Defines a function to display the first page of a PDF as an image.
223
+ def display_pdf(pdf_url):
224
+ # If no URL is provided, hide the image and status components in the UI.
225
+ if not pdf_url:
226
+ return (
227
+ gr.update(value=None, visible=False),
228
+ gr.update(value="", visible=False)
229
+ )
230
  try:
231
+ # Download the PDF content from the URL.
232
+ pdf_bytes = download_pdf_from_url(pdf_url)
233
+ # Open the PDF from the in-memory bytes.
234
  doc = fitz.open(stream=pdf_bytes, filetype="pdf")
235
+ # Get the first page of the document.
236
+ page = doc[0]
237
+ # Create a transformation matrix to render the page at a higher resolution.
238
+ zoom = 1.5
239
+ mat = fitz.Matrix(zoom, zoom)
240
+ # Get a pixmap (a raster image) of the page.
241
+ pix = page.get_pixmap(matrix=mat)
242
+ # Convert the pixmap to a PNG image using PIL.
243
  img = Image.open(io.BytesIO(pix.tobytes("png")))
244
+ # Close the PDF document to free up resources.
245
+ doc.close()
246
+ # Return the image to be displayed in the UI and hide any status messages.
247
+ return (
248
+ gr.update(value=img, visible=True),
249
+ gr.update(value="", visible=False)
250
+ )
251
+ except Exception as e:
252
+ # If an error occurs, log it and display a failure message in the UI.
253
+ logger.error(f"PDF error: {e}")
254
+ return (
255
+ gr.update(value=None, visible=False),
256
+ gr.update(value="❌ Failed to load PDF", visible=True)
257
+ )
258
+
259
+ theme = gr.themes.Ocean(
260
+ font=[gr.themes.GoogleFont("Inter"), "Segoe UI", "sans-serif"],
261
+ font_mono=[gr.themes.GoogleFont("Fira Code"), "monospace"]
262
+ )
263
 
264
+ demo_css = """
265
+ .chatbot {
266
+ font-family: 'Inter', 'Segoe UI', sans-serif !important;
267
+ font-size: 12px !important;
268
+ }
269
+
270
+ .chatbot .message code,
271
+ .chatbot .message pre {
272
+ font-size: 12px !important;
273
+ font-family: 'Inter', 'Segoe UI', sans-serif !important;
274
+ }
275
+
276
+ .component {
277
+ font-family: 'Inter', 'Segoe UI', sans-serif !important;
278
+ font-size: 12px !important;
279
+ }
280
+
281
+ .gradio-container .examples {
282
+ font-family: 'Inter', 'Segoe UI', sans-serif !important;
283
+ font-size: 12px !important;
284
+ }
285
+ """
286
+
287
+ # Create the Gradio interface using `gr.Blocks` for a custom layout.
288
+ with gr.Blocks(
289
+ title="# 🤖 Dox the Data Professional's Advisor 🤖",
290
+ theme=theme,
291
+ css=demo_css,
292
+ fill_width=True
293
+ ) as demo:
294
+ # Add titles and descriptions using Markdown.
295
  gr.Markdown("# 🤖 Dox the Data Professional's Advisor 🤖")
296
+ gr.Markdown("### 🧠 Dox is an expert in the following topics: \n1️⃣ Hugging Face | 2️⃣ AI Agents | 3️⃣ SQL with AI | 4️⃣ Azure CLI | 5️⃣ Power BI | 6️⃣ Python")
297
+ def run_example(question_text, chat_history):
298
+ return chat_ui(question_text, chat_history)
299
+ # Create a main row for the layout.
 
 
 
 
 
 
300
  with gr.Row():
301
+ # LEFT-SIDE COLUMN: for the chat interface.
302
+ with gr.Column(scale=3):
303
+ # The chatbot display window.
304
+ chatbot = gr.Chatbot(label="💬 Conversation", elem_classes="chatbot", height=450)
305
+ # A text area for status messages (used for PDF loading status).
306
+ status_text = gr.Markdown("")
307
+ # The textbox where the user types their question.
308
+ question = gr.Textbox(
309
+ label="🙋 Ask Dox a question:",
310
+ placeholder="🤔 Type your question here...",
311
+ lines=1,
312
+ elem_classes="component"
313
+ )
314
+ # The submit button.
315
+ #ask_btn = gr.Button("Submit 📤", variant="primary")
316
+ with gr.Row():
317
+ ask_btn = gr.Button("Submit 📤", variant="primary", elem_classes="component")
318
+ clear_btn = gr.Button("🧹 Clear Chat", elem_classes="component")
319
+ # A section for example questions.
320
+ gr.Markdown("### 💡 Example Questions", elem_classes="component")
321
+ examples = gr.Examples(
322
+ examples=[
323
+ "How do you log into Azure using device code authentication?",
324
+ "What are the three main components of an AI agent?",
325
+ "What are the \"core four\" Hugging Face libraries?",
326
+ "What SQL clause is used to filter data after grouping?",
327
+ "What is the latest GPT model?"
328
+ ],
329
+ inputs=question,
330
+ outputs=[chatbot, question],
331
+ fn=run_example,
332
+ cache_examples=False
333
+ )
334
+ # 👍👎 Feedback buttons
335
+ with gr.Row():
336
+ thumbs_up = gr.Button("👍 Helpful", elem_classes="component")
337
+ thumbs_down = gr.Button("👎 Not Helpful", elem_classes="component")
338
+
339
+ # Hidden feedback box (only appears on 👎)
340
+ feedback_box = gr.Textbox(
341
+ placeholder="💬 Optional: tell us what went wrong...",
342
+ visible=False
343
+ )
344
+
345
+ submit_feedback_btn = gr.Button("📝 Submit Feedback", visible=False, elem_classes="component")
346
+ feedback_status = gr.Markdown("", elem_classes="component")
347
+ # RIGHT-SIDE COLUMN: for the PDF preview.
348
+ with gr.Column(scale=3):
349
+ gr.Markdown("### 📄 Referenced PDF Document (🌐 Empty for Web Results)", elem_classes="component")
350
+ #gr.Markdown(" 🌐 Empty by default", elem_classes="component")
351
+ # A hidden state to store the PDF link found in the agent's response.
352
+ link_state = gr.State()
353
+ # A markdown component to show PDF loading status.
354
+ pdf_status = gr.Markdown(visible=False, elem_classes="component")
355
+ # An image component to display the PDF preview.
356
+ output_image = gr.Image(
357
+ label="⬇️ Cheat Sheet Preview",
358
+ visible=False
359
+ )
360
+ pdf_link_btn = gr.Markdown("")
361
+
362
+ # Defines the main chat logic as a generator function for streaming output.
363
+ def chat_ui(user_message, chat_history):
364
+ # Initialize chat history if it's the first turn.
365
+ if chat_history is None:
366
+ chat_history = []
367
+
368
+ # Append the user's message to the chat history.
369
+ chat_history.append({
370
+ "role": "user",
371
+ "content": user_message
372
+ })
373
+
374
+ # Append a temporary "Thinking..." message from the assistant.
375
+ chat_history.append({
376
+ "role": "assistant",
377
+ "content": "🤔 Dox is thinking..."
378
+ })
379
+
380
+ # `yield` immediately updates the UI with the user's message and "Thinking...".
381
+ # It also clears the user's input textbox.
382
+ yield (
383
+ chat_history,
384
+ None, # No link yet.
385
+ gr.update(value=None, visible=False), # Hide image preview.
386
+ "" # Clear textbox.
387
+ )
388
+
389
+ # Call the agent to get the actual response and PDF link.
390
+ response_text, link = ask_agent(user_message)
391
+
392
+ # Replace the "Thinking..." message with the final response from the agent.
393
+ chat_history[-1] = {
394
+ "role": "assistant",
395
+ "content": response_text
396
+ }
397
+
398
+ # `yield` again to update the UI with the final response.
399
+ yield (
400
+ chat_history,
401
+ link, # Pass the extracted link to the link_state.
402
+ gr.update(value=None, visible=False), # Keep image preview hidden for now.
403
+ "" # Keep textbox clear.
404
+ )
405
+
406
+ # This is a helper function to avoid repeating the event handler chain.
407
+ def submit_chain():
408
+ # It specifies that `chat_ui` is the function to run.
409
+ # It maps the `question` textbox and `chatbot` history as inputs.
410
+ # It maps the outputs to `chatbot` history, `link_state`, `output_image`, and clears the `question` textbox.
411
+ return (
412
+ chat_ui,
413
+ [question, chatbot],
414
+ [chatbot, link_state, output_image, question]
415
+ )
416
+
417
+ def show_pdf_link(link):
418
+ if link:
419
+ return f"[📥 Open Full PDF]({link})"
420
+ return ""
421
+
422
+ def clear_chat():
423
+ return [], None, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False)
424
+
425
+ clear_btn.click(
426
+ clear_chat,
427
+ outputs=[chatbot, link_state, output_image, feedback_box, submit_feedback_btn]
428
+ )
429
 
430
+ def show_feedback_box():
431
+ return gr.update(visible=True), gr.update(visible=True)
432
 
433
+ def show_appreciation():
434
+ logger.info("It was helpful!")
435
+ return "✅ Feedback submitted. Thank you!"
436
+
437
+ thumbs_down.click(
438
+ show_feedback_box,
439
+ outputs=[feedback_box, submit_feedback_btn]
440
+ )
441
 
442
+ thumbs_up.click(
443
+ show_appreciation,
444
+ outputs=feedback_status
445
+ )
 
 
446
 
447
+ def handle_feedback(text):
448
+ logger.info(f"User feedback: {text}")
449
+ return "✅ Feedback submitted. Thank you!"
450
+
451
+ submit_feedback_btn.click(
452
+ handle_feedback,
453
+ inputs=feedback_box,
454
+ outputs=feedback_status
455
+ )
456
 
457
+ examples.dataset.click(
458
+ *submit_chain()
459
+ ).then(
460
+ prepare_pdf_loading,
461
+ inputs=link_state,
462
+ outputs=pdf_status
463
+ ).then(
464
+ display_pdf,
465
+ inputs=link_state,
466
+ outputs=[output_image, pdf_status]
467
+ ).then(
468
+ show_pdf_link,
469
+ inputs=link_state,
470
+ outputs=pdf_link_btn
471
+ )
472
 
473
+ # Set up the event handler for the "Submit" button click.
474
+ ask_btn.click(
475
+ *submit_chain()
476
+ # `.then()` chains subsequent actions after the first one completes.
477
+ ).then(
478
+ # After chat_ui, call `prepare_pdf_loading` to show the "loading" message.
479
+ prepare_pdf_loading,
480
+ inputs=link_state, # Use the link from chat_ui's output.
481
+ outputs=pdf_status # Update the pdf_status text.
482
+ ).then(
483
+ # Finally, call `display_pdf` to render the PDF page.
484
+ display_pdf,
485
+ inputs=link_state, # Use the same link.
486
+ outputs=[output_image, pdf_status] # Update the image and hide the status text.
487
+ ).then(
488
+ show_pdf_link,
489
+ inputs=link_state,
490
+ outputs=pdf_link_btn
491
+ )
492
+
493
+ # Set up the same event handler for when the user presses Enter in the textbox.
494
+ question.submit(
495
+ *submit_chain()
496
+ ).then(
497
+ prepare_pdf_loading,
498
+ inputs=link_state,
499
+ outputs=pdf_status
500
+ ).then(
501
+ display_pdf,
502
+ inputs=link_state,
503
+ outputs=[output_image, pdf_status]
504
+ ).then(
505
+ show_pdf_link,
506
+ inputs=link_state,
507
+ outputs=pdf_link_btn
508
+ )
509
 
510
+ # This block ensures the code inside only runs when the script is executed directly.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  if __name__ == "__main__":
512
+ # Launch the Gradio web server.
513
  demo.launch()