File size: 26,618 Bytes
853a786 97bd68d a26d789 853a786 97bd68d a26d789 853a786 b9e7ef5 a26d789 853a786 97bd68d 853a786 97bd68d a26d789 853a786 97bd68d 853a786 97bd68d b9e7ef5 a26d789 853a786 97bd68d 853a786 97bd68d 853a786 97bd68d 868bb60 97bd68d a26d789 853a786 97bd68d 0d9a526 97bd68d a26d789 862996f bb60057 862996f 853a786 d2ae2c3 853a786 897b028 a26d789 853a786 897b028 853a786 897b028 853a786 897b028 853a786 a26d789 853a786 897b028 a26d789 853a786 97bd68d 853a786 97bd68d 853a786 97bd68d 853a786 897b028 853a786 a26d789 853a786 a26d789 853a786 97bd68d 853a786 97bd68d 853a786 97bd68d 853a786 a26d789 853a786 a1fca8b 054c01a a1fca8b 054c01a a1fca8b 054c01a a1fca8b 853a786 97bd68d 853a786 054c01a 853a786 054c01a 853a786 10317f7 853a786 97bd68d 853a786 97bd68d a26d789 853a786 0526966 853a786 d731a75 853a786 e6df135 853a786 97bd68d 853a786 97bd68d 853a786 320b22a 853a786 c477310 0dc3a88 b44ee52 788474e b44ee52 54d5019 b44ee52 54d5019 b44ee52 d14a4d3 54d5019 d14a4d3 b44ee52 54d5019 afb6083 54d5019 afb6083 54d5019 afb6083 54d5019 ff75bc9 54d5019 ff75bc9 54d5019 ff75bc9 54d5019 ff75bc9 47de385 451b9be 4db6972 d14a4d3 ff75bc9 54d5019 ff75bc9 54d5019 ff75bc9 54d5019 ff75bc9 54d5019 ff75bc9 8a3e115 ff75bc9 54d5019 ff75bc9 54d5019 ff75bc9 54d5019 ff75bc9 54d5019 afb6083 54d5019 afb6083 54d5019 afb6083 54d5019 afb6083 54d5019 afb6083 54d5019 ff75bc9 54d5019 ff75bc9 54d5019 ff75bc9 54d5019 ff75bc9 54d5019 afb6083 54d5019 afb6083 54d5019 ff75bc9 54d5019 ff75bc9 54d5019 afb6083 54d5019 afb6083 54d5019 afb6083 54d5019 afb6083 853a786 11397c9 c477310 11397c9 853a786 0e340b2 c477310 69c538e c477310 ff75bc9 3984848 afb6083 3984848 afb6083 69e2e7f 11397c9 afb6083 b537b8b afb6083 307be7f afb6083 29d2ca1 b537b8b 307be7f afb6083 19f6879 8db7a66 19f6879 054f094 19f6879 054f094 19f6879 054f094 19f6879 054f094 19f6879 853a786 19f6879 853a786 19f6879 853a786 19f6879 853a786 19f6879 853a786 19f6879 853a786 19f6879 853a786 19f6879 cbdcf02 19f6879 853a786 19f6879 853a786 19f6879 853a786 19f6879 0526966 853a786 97bd68d e012885 10317f7 e012885 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 | # Import necessary libraries for logging, system operations, and file handling.
import logging
import sys
import os
# Import core components from the 'agno' library for building the agent.
from agno.agent import Agent
from agno.models.openai import OpenAIChat
from agno.knowledge.embedder.openai import OpenAIEmbedder
from agno.tools.duckduckgo import DuckDuckGoTools
from agno.knowledge.knowledge import Knowledge
from agno.vectordb.lancedb import LanceDb, SearchType
# Import Gradio for creating the web user interface.
import gradio as gr
# Import libraries for handling PDFs and images.
import fitz # PyMuPDF, used for PDF processing.
from PIL import Image # Pillow library for image manipulation.
import io # Used to handle in-memory binary streams.
import requests # For making HTTP requests to download files.
import re # Regular expressions for searching text patterns.
# Configure basic logging to output messages to the console.
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# Get a logger instance for this script.
logger = logging.getLogger(__name__)
# Retrieve the OpenAI API key from environment variables.
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
# If the API key is not found, raise an error.
if not OPENAI_API_KEY:
raise ValueError("Missing OPENAI_API_KEY")
# Initialize the Knowledge Base for the agent.
knowledge = Knowledge(
# Use LanceDB as the vector database to store and search document embeddings.
vector_db=LanceDb(
uri="tmp/lancedb", # Directory to store the database.
table_name="pdf_documents", # Name of the table within the database.
search_type=SearchType.vector, # Use vector search for finding relevant documents.
# Use OpenAI's embedding model to convert text into numerical vectors.
embedder=OpenAIEmbedder(id="text-embedding-3-small"),
)
)
# A list of URLs pointing to PDF documents that will be added to the knowledge base.
pdf_urls = [
"https://media.datacamp.com/cms/working-with-hugging-face.pdf",
"https://media.datacamp.com/cms/ai-agents-cheat-sheet.pdf",
"https://media.datacamp.com/cms/introduction-to-sql-with-ai-1.pdf",
"https://media.datacamp.com/legacy/image/upload/v1719844709/Marketing/Blog/Azure_CLI_Cheat_Sheet.pdf",
"https://s3.amazonaws.com/assets.datacamp.com/email/other/Power+BI_Cheat+Sheet.pdf",
"https://media.datacamp.com/cms/python-basics-cheat-sheet-v4.pdf"
]
# -----------------------------
# Example Questions
# -----------------------------
example_questions=[
"How do you log into Azure using device code authentication?",
"What are the three main components of an AI agent?",
"What are the \"core four\" Hugging Face libraries?",
"What SQL clause is used to filter data after grouping?",
"What is the latest GPT model?"
]
# -----------------------------
# Chat Functions
# -----------------------------
initial_chat = [
{
"role": "assistant",
"content": "Hello, I am Dox, the Data Professional's Guide. Ask me a question about data."
}
]
# Defines a function to download a file from a URL if it doesn't already exist locally.
def download_if_needed(url, filename):
# Check if the file path does not exist.
if not os.path.exists(filename):
logger.info(f"Downloading {url}...")
# Send an HTTP GET request to the URL.
response = requests.get(url)
# Open the local file in write-binary mode.
with open(filename, "wb") as f:
# Write the content of the response to the file.
f.write(response.content)
logger.info(f"Downloaded {filename} ({len(response.content)} bytes)")
# Create a directory named 'pdf_cache' to store downloaded PDF files.
# 'exist_ok=True' prevents an error if the directory already exists.
os.makedirs("pdf_cache", exist_ok=True)
# Defines a function to add the specified PDFs to the agent's knowledge base.
def add_pdfs_to_knowledge():
"""Add PDFs to knowledge base using the correct method for the installed agno version"""
# Create an empty list to hold information about the content to be added.
contents_to_add = []
# Loop through the list of PDF URLs with their index.
for i, url in enumerate(pdf_urls):
# Define a local filename for the cached PDF.
filename = f"pdf_cache/file_{i}.pdf"
try:
# Download the PDF if it's not already in the cache.
download_if_needed(url, filename)
# Prepare a dictionary with the file path and metadata (source URL).
contents_to_add.append({
"path": filename,
"metadata": {"source": url}
})
logger.info(f"Prepared PDF {i+1}: {url}")
except Exception as e:
# Log an error if the PDF preparation fails.
logger.error(f"Failed to prepare PDF {i+1}: {str(e)}")
# Proceed only if there are PDFs to add.
if contents_to_add:
try:
# This block checks for the correct method to add documents based on the 'agno' library version.
# Check if the 'add_contents' method (for batch processing) exists.
if hasattr(knowledge, 'add_contents'):
knowledge.add_contents(contents_to_add)
logger.info(f"β
Successfully added {len(contents_to_add)} PDFs using add_contents")
# Else, check if the 'add_content' method (for single item processing) exists.
elif hasattr(knowledge, 'add_content'):
for item in contents_to_add:
knowledge.add_content(**item)
logger.info(f"β
Successfully added {len(contents_to_add)} PDFs using add_content")
# As a fallback for older versions, manually read and insert the documents.
else:
from agno.document.reader.pdf_reader import PDFReader
reader = PDFReader()
all_docs = []
for item in contents_to_add:
docs = reader.read(item["path"])
for doc in docs:
doc.metadata = item["metadata"]
all_docs.append(doc)
knowledge.vector_db.insert(documents=all_docs)
logger.info(f"β
Successfully added {len(all_docs)} document chunks from {len(contents_to_add)} PDFs")
except Exception as e:
# Log and re-raise any exception that occurs during the addition process.
logger.error(f"Failed to add PDFs: {str(e)}")
raise
else:
# Warn if no PDFs were prepared.
logger.warning("No PDFs were prepared to add")
# Call the function to load the PDFs into the knowledge base.
add_pdfs_to_knowledge()
# Initialize the AI agent with its configuration.
agent = Agent(
# Set the underlying language model to OpenAI's GPT-4.1-mini with low temperature for more predictable responses.
model=OpenAIChat(id="gpt-4.1-mini", temperature=0.2),
# Give the agent a name/description.
description="You are Dox a data expert!",
# Provide detailed instructions (the "system prompt") that govern the agent's behavior.
instructions="""
You are a data professional's assistant named Dox.
Your primary goal is to answer questions about data, programming, cloud computing, AI/ML, and technology topics.
Here are your operating procedures:
1. **Information Gathering Strategy**:
* **Prioritize Knowledge Base**: First, search your internal knowledge base for the answer.
* **Supplement with Web Search**: If the knowledge base information is outdated, insufficient, or the question is better suited for current web information, use the DuckDuckGo tool to perform web searches to fill in gaps or find the most up-to-date data.
* For general technology questions not in your knowledge base, use the DuckDuckGo tool to perform web search to provide accurate answers.
* If the question is asking for the "latest" or "most recent" of a data-related topic, ALWAYS use the DuckDuckGo tool to perform the latest web search and datetime to context.
* If the question is NOT data-related, you MUST respond with: "Please ask relevant data questions only." and terminate.
2. **Response Length Guidelines**:
* For basic questions, keep your answer to a maximum of 300 words.
* For complex questions, extend your answer to a maximum of 500 words.
3. **Citation Rules (CRITICAL)**:
* **Knowledge Base Citation**: For any information sourced from your internal knowledge base, you MUST ALWAYS include a citation on a NEW LINE after the answer, starting with "Source: ", followed by the metadata field 'source' to get the hyperlink.
* **Web Search Citation**: For any information obtained from the web using the DuckDuckGo tool, you MUST ALWAYS include a citation on a NEW LINE after the answer, starting with "Online Source: ", followed by the full hyperlink.
* **Final Rule for Citations**: ALWAYS end your answers with the appropriate citations, ensuring they are on separate lines as specified. Do NOT mix or combine citation types on a single line.
* ALWAYS cite with links NOT text like "from internal knowledge base"
4. **Accuracy and Non-Hallucination**:
* Provide factual and relevant answers based ONLY on the information found in your knowledge base or through the DuckDuckGo tool to perform web searches.
* NEVER invent or hallucinate information. If an answer cannot be found, state that directly.
Make sure to follow these instructions precisely.
""",
# Link the agent to the knowledge base created earlier.
knowledge=knowledge,
# Automatically add the current date and time to the agent's context.
add_datetime_to_context=True,
# Automatically add the user's location to the context (if available).
add_location_to_context=True,
# Enable the agent to search its knowledge base by default.
#search_knowledge=True,
# Equip the agent with tools, in this case, the ability to search the web using DuckDuckGo.
tools=[DuckDuckGoTools()],
# Enable markdown formatting in the agent's output.
markdown=True
)
# Log a success message indicating the agent is ready.
logger.info("π’ Agent initialized successfully!")
# Defines a function to process a user's question.
def ask_agent(question):
logger.info(f"Question asked: {question[:100]}...")
try:
# Run the agent with the user's question, ensuring it uses its knowledge base.
response = agent.run(question, use_knowledge=True)
# Get the agent's response as a single string.
full_content = response.get_content_as_string()
except Exception as e:
logger.error(str(e))
return "β Something went wrong. Please try again.", None
# Use a regular expression to find the first URL ending in '.pdf' in the response.
match = re.search(r'https?://[^\s]+\.pdf', full_content, re.IGNORECASE)
# Extract the link if a match is found, otherwise set it to None.
link = match.group(0) if match else None
if link:
logger.info(f"PDF link found: {link}")
else:
logger.info("π΄ No PDF link found in response")
# Return the full text response and the extracted PDF link.
# full_content += "\n\n---\n**π Try asking:**\n- Give me a real example...\n- Explain step by step...\n- Compare with alternatives..."
full_content += "\n\n---\n**π Dox would appreciate your feedback! β¬οΈ**"
return full_content, link
# Defines a function to download the raw content of a PDF from a URL.
def download_pdf_from_url(url):
# Make an HTTP GET request with a timeout.
response = requests.get(url, timeout=30)
# Raise an exception if the request was not successful (e.g., 404 error).
response.raise_for_status()
# Return the binary content of the PDF.
return response.content
# A Gradio helper function to update the UI while a PDF is being prepared for display.
def prepare_pdf_loading(link):
# If a link exists, show a "Loading..." message.
if link:
return gr.update(value="π Loading PDF preview...", visible=True)
# Otherwise, hide the message.
return gr.update(value="β No PDF for preview", visible=True)
# Defines a function to display the first page of a PDF as an image.
def display_pdf(pdf_url):
# If no URL is provided, hide the image and status components in the UI.
if not pdf_url:
return (
gr.update(value=None, visible=False),
gr.update(value="", visible=False)
)
try:
# Download the PDF content from the URL.
pdf_bytes = download_pdf_from_url(pdf_url)
# Open the PDF from the in-memory bytes.
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
# Get the first page of the document.
page = doc[0]
# Create a transformation matrix to render the page at a higher resolution.
zoom = 1.5
mat = fitz.Matrix(zoom, zoom)
# Get a pixmap (a raster image) of the page.
pix = page.get_pixmap(matrix=mat)
# Convert the pixmap to a PNG image using PIL.
img = Image.open(io.BytesIO(pix.tobytes("png")))
# Close the PDF document to free up resources.
doc.close()
# Return the image to be displayed in the UI and hide any status messages.
return (
gr.update(value=img, visible=True),
gr.update(value="", visible=False)
)
except Exception as e:
# If an error occurs, log it and display a failure message in the UI.
logger.error(f"PDF error: {e}")
return (
gr.update(value=None, visible=False),
gr.update(value="β Failed to load PDF", visible=True)
)
theme = gr.themes.Ocean(
font=[gr.themes.GoogleFont("Inter"), "Segoe UI", "sans-serif"],
font_mono=[gr.themes.GoogleFont("Fira Code"), "monospace"]
)
DOX_LOGO = "https://raw.githubusercontent.com/AzzamAlnatsheh/DDS_BuildingAIChallenge/main/Dox Transparent Image.png"
custom_css = """
.main-container {
width: 100%;
max-width: 100%;
margin: 0;
padding: 0 10px;
}
/* HEADER */
.header-card {
background: linear-gradient(135deg, #0B1F3A, #102A4C);
border-radius: 24px;
padding: 0px;
box-shadow: 0 16px 40px rgba(2, 6, 23, 0.4);
border: 1px solid #1F3B5C;
margin: 0px;
}
/* SIDEBAR */
.sidebar-card {
background: rgba(11, 31, 58, 0.95);
border-radius: 24px;
padding: 24px;
box-shadow: 0 16px 40px rgba(2, 6, 23, 0.35);
border: 1px solid #1F3B5C;
height: 100%;
}
/* CHAT */
.chat-card {
background: rgba(16, 42, 76, 0.95);
border-radius: 24px;
padding: 22px;
box-shadow: 0 16px 40px rgba(2, 6, 23, 0.35);
border: 1px solid #1F3B5C;
}
/* LOGO */
.logo-img {
width: 200px;
height: 170px;
margin: 0px;
padding: 0px;
}
/* TITLE */
.title-text {
font-size: 32px;
font-weight: 850;
color: #E6F0FA;
margin-bottom: 8px;
letter-spacing: -0.03em;
}
/* SUBTITLE */
.subtitle-text {
font-size: 16px;
color: #AFCBE8;
line-height: 1.65;
max-width: auto;
}
/* BADGES */
.badge {
display: inline-block;
background: rgba(30, 167, 255, 0.15);
color: #4FD1FF;
padding: 7px 13px;
border-radius: 999px;
font-size: 13px;
font-weight: 650;
margin-right: 7px;
margin-bottom: 8px;
border: 1px solid rgba(30, 167, 255, 0.3);
}
/* STATUS BOX */
.status-box {
background: rgba(16, 42, 76, 0.85);
border: 1px solid #1F3B5C;
padding: 14px;
border-radius: 16px;
font-size: 14px;
color: #CFE6FF;
line-height: 1.6;
}
/* NOTES */
.small-note {
font-size: 13px;
color: #9FB9D9;
line-height: 1.55;
}
.footer-note {
font-size: 13px;
color: #9FB9D9;
text-align: center;
margin-top: 18px;
}
/* CHATBOT BOX */
#chatbot {
min-height: 540px;
border-radius: 18px;
border: 1px solid #1F3B5C;
background: rgba(11, 31, 58, 0.9);
}
/* INPUT */
#question_box textarea {
border-radius: 16px !important;
background: #0B1F3A;
color: #E6F0FA;
border: 1px solid #1F3B5C;
}
/* EXAMPLES */
.example-button {
margin-bottom: 8px !important;
border-radius: 14px !important;
white-space: normal !important;
text-align: left !important;
background: rgba(30, 167, 255, 0.08);
color: #CFE6FF;
border: 1px solid rgba(30, 167, 255, 0.2);
}
/* PRIMARY BUTTON */
.primary-action {
border-radius: 14px !important;
background: linear-gradient(135deg, #1EA7FF, #4FD1FF) !important;
color: #0B1F3A !important;
border: none !important;
font-weight: 700;
}
/* CLEAR BUTTON */
.clear-action {
border-radius: 14px !important;
background: rgba(255, 255, 255, 0.05) !important;
color: #E6F0FA !important;
border: 1px solid #1F3B5C !important;
}
"""
# Create the Gradio interface using `gr.Blocks` for a custom layout.
with gr.Blocks(title="π€ Dox the Data Professional's Guide π€",
#theme=theme,
#css=demo_css,
fill_width=True
) as demo:
def run_example(question_text, chat_history):
return chat_ui(question_text, chat_history)
with gr.Column(elem_classes=["main-container"]):
# -----------------------------
# Header
# -----------------------------
with gr.Row(elem_classes=["header-card"]):
with gr.Column(scale=1):
gr.HTML(
f"""
<img src="{DOX_LOGO}" class="logo-img" alt="DOX Logo">
"""
)
with gr.Column(scale=5):
gr.HTML(
"""
<div class="title-text">π€ Dox the Data Professional's Advisor π€</div>
<div class="subtitle-text">
A professional data chatbot that aims in reminding and helping data experts in certain concepts in a simplified way
while also having access to download DataCamp's public cheatsheets on many data-related topics.
</div>
<br>
<span class="badge">Agno</span>
<span class="badge">LanceDB</span>
<span class="badge">OpenAI</span>
<span class="badge">Gradio</span>
"""
)
# -----------------------------
# Two Column Layout
# -----------------------------
with gr.Row():
# Right Chat Area
with gr.Column(scale=3, elem_classes=["chat-card"]):
gr.Markdown("### π§ Dox is an expert in the following topics: \n1οΈβ£ Hugging Face | 2οΈβ£ AI Agents | 3οΈβ£ SQL with AI | 4οΈβ£ Azure CLI | 5οΈβ£ Power BI | 6οΈβ£ Python")
chatbot = gr.Chatbot(
label="π¬ Conversation",
elem_id="chatbot",
value=initial_chat.copy(),
height=450
)
question = gr.Textbox(
label="π Ask Dox a question:",
placeholder="π€ Type your question here...",
lines=1,
elem_classes="question_box"
)
with gr.Row():
ask_btn = gr.Button("Submit π€", variant="primary", elem_classes="primary-action")
clear_btn = gr.Button("π§Ή Clear Chat", elem_classes="clear-action")
# A section for example questions.
gr.Markdown("### π‘ Example Questions", elem_classes="example-button")
examples = gr.Examples(
examples=example_questions,
inputs=question,
outputs=[chatbot, question],
fn=run_example,
cache_examples=False
)
# ππ Feedback buttons
with gr.Row():
thumbs_up = gr.Button("π Helpful", elem_classes="clear-action")
thumbs_down = gr.Button("π Not Helpful", elem_classes="clear-action")
# Hidden feedback box (only appears on π)
feedback_box = gr.Textbox(
placeholder="π¬ Optional: tell us what went wrong...",
visible=False
)
submit_feedback_btn = gr.Button("π Submit Feedback", visible=False, elem_classes="clear-action")
feedback_status = gr.Markdown("", elem_classes="clear-action")
# RIGHT-SIDE COLUMN: for the PDF preview.
with gr.Column(scale=3):
gr.Markdown("### π Referenced PDF Document (π Empty for Web Results)", elem_classes="clear-action")
#gr.Markdown(" π Empty by default", elem_classes="component")
# A hidden state to store the PDF link found in the agent's response.
link_state = gr.State()
# A markdown component to show PDF loading status.
pdf_status = gr.Markdown(visible=False, elem_classes="clear-action")
# An image component to display the PDF preview.
output_image = gr.Image(
label="β¬οΈ Cheat Sheet Preview",
visible=False
)
pdf_link_btn = gr.Markdown("")
# Defines the main chat logic as a generator function for streaming output.
def chat_ui(user_message, chat_history):
# Initialize chat history if it's the first turn.
if chat_history is None:
chat_history = []
# Append the user's message to the chat history.
chat_history.append({
"role": "user",
"content": user_message
})
# Append a temporary "Thinking..." message from the assistant.
chat_history.append({
"role": "assistant",
"content": "π€ Dox is thinking..."
})
# `yield` immediately updates the UI with the user's message and "Thinking...".
# It also clears the user's input textbox.
yield (
chat_history,
None, # No link yet.
gr.update(value=None, visible=False), # Hide image preview.
"" # Clear textbox.
)
# Call the agent to get the actual response and PDF link.
response_text, link = ask_agent(user_message)
# Replace the "Thinking..." message with the final response from the agent.
chat_history[-1] = {
"role": "assistant",
"content": response_text
}
# `yield` again to update the UI with the final response.
yield (
chat_history,
link, # Pass the extracted link to the link_state.
gr.update(value=None, visible=False), # Keep image preview hidden for now.
"" # Keep textbox clear.
)
# This is a helper function to avoid repeating the event handler chain.
def submit_chain():
# It specifies that `chat_ui` is the function to run.
# It maps the `question` textbox and `chatbot` history as inputs.
# It maps the outputs to `chatbot` history, `link_state`, `output_image`, and clears the `question` textbox.
return (
chat_ui,
[question, chatbot],
[chatbot, link_state, output_image, question]
)
def show_pdf_link(link):
if link:
return f"[π₯ Open Full PDF]({link})"
return ""
def clear_chat():
return [], None, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False)
clear_btn.click(
clear_chat,
outputs=[chatbot, link_state, output_image, feedback_box, submit_feedback_btn]
)
def show_feedback_box():
return gr.update(visible=True), gr.update(visible=True)
def show_appreciation():
logger.info("It was helpful!")
return "β
Feedback submitted. Thank you!"
thumbs_down.click(
show_feedback_box,
outputs=[feedback_box, submit_feedback_btn]
)
thumbs_up.click(
show_appreciation,
outputs=feedback_status
)
def handle_feedback(text):
logger.info(f"User feedback: {text}")
return "β
Feedback submitted. Thank you!"
submit_feedback_btn.click(
handle_feedback,
inputs=feedback_box,
outputs=feedback_status
)
examples.dataset.click(
*submit_chain()
).then(
prepare_pdf_loading,
inputs=link_state,
outputs=pdf_status
).then(
display_pdf,
inputs=link_state,
outputs=[output_image, pdf_status]
).then(
show_pdf_link,
inputs=link_state,
outputs=pdf_link_btn
)
# Set up the event handler for the "Submit" button click.
ask_btn.click(
*submit_chain()
# `.then()` chains subsequent actions after the first one completes.
).then(
# After chat_ui, call `prepare_pdf_loading` to show the "loading" message.
prepare_pdf_loading,
inputs=link_state, # Use the link from chat_ui's output.
outputs=pdf_status # Update the pdf_status text.
).then(
# Finally, call `display_pdf` to render the PDF page.
display_pdf,
inputs=link_state, # Use the same link.
outputs=[output_image, pdf_status] # Update the image and hide the status text.
).then(
show_pdf_link,
inputs=link_state,
outputs=pdf_link_btn
)
# Set up the same event handler for when the user presses Enter in the textbox.
question.submit(
*submit_chain()
).then(
prepare_pdf_loading,
inputs=link_state,
outputs=pdf_status
).then(
display_pdf,
inputs=link_state,
outputs=[output_image, pdf_status]
).then(
show_pdf_link,
inputs=link_state,
outputs=pdf_link_btn
)
# This block ensures the code inside only runs when the script is executed directly.
if __name__ == "__main__":
demo.launch(
theme=gr.themes.Ocean(
primary_hue="indigo",
neutral_hue="slate"
),
css=custom_css
) |