File size: 26,618 Bytes
853a786
97bd68d
 
 
a26d789
853a786
97bd68d
 
 
 
 
 
a26d789
853a786
b9e7ef5
a26d789
853a786
 
 
 
 
 
 
 
97bd68d
853a786
97bd68d
a26d789
853a786
97bd68d
853a786
97bd68d
b9e7ef5
a26d789
853a786
97bd68d
853a786
97bd68d
853a786
 
 
 
97bd68d
868bb60
97bd68d
a26d789
853a786
97bd68d
 
 
 
0d9a526
 
 
97bd68d
a26d789
862996f
 
 
 
 
 
 
 
 
 
 
bb60057
 
 
 
 
 
 
 
 
 
862996f
853a786
d2ae2c3
853a786
897b028
a26d789
853a786
897b028
853a786
897b028
853a786
897b028
853a786
a26d789
853a786
 
897b028
a26d789
853a786
97bd68d
853a786
 
 
 
 
97bd68d
853a786
97bd68d
853a786
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
897b028
853a786
 
a26d789
853a786
 
a26d789
853a786
97bd68d
853a786
97bd68d
853a786
97bd68d
853a786
 
 
a26d789
853a786
a1fca8b
 
 
054c01a
 
a1fca8b
 
 
 
 
054c01a
 
 
a1fca8b
 
054c01a
a1fca8b
853a786
 
 
97bd68d
853a786
054c01a
853a786
054c01a
853a786
10317f7
853a786
97bd68d
853a786
97bd68d
 
a26d789
853a786
 
0526966
853a786
 
 
 
 
d731a75
853a786
 
e6df135
853a786
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97bd68d
853a786
 
 
97bd68d
853a786
 
 
 
 
 
 
 
320b22a
853a786
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c477310
0dc3a88
b44ee52
788474e
 
 
 
b44ee52
54d5019
 
b44ee52
54d5019
b44ee52
d14a4d3
54d5019
 
d14a4d3
b44ee52
54d5019
 
afb6083
54d5019
afb6083
 
54d5019
 
afb6083
 
54d5019
 
ff75bc9
54d5019
ff75bc9
 
54d5019
 
ff75bc9
54d5019
 
ff75bc9
47de385
451b9be
4db6972
d14a4d3
ff75bc9
54d5019
 
ff75bc9
 
 
54d5019
ff75bc9
 
 
54d5019
 
ff75bc9
 
54d5019
ff75bc9
8a3e115
ff75bc9
54d5019
 
ff75bc9
 
54d5019
 
ff75bc9
 
 
 
 
 
54d5019
ff75bc9
54d5019
 
afb6083
54d5019
 
afb6083
 
 
54d5019
afb6083
 
54d5019
 
afb6083
 
54d5019
afb6083
 
54d5019
ff75bc9
 
54d5019
ff75bc9
 
 
54d5019
 
ff75bc9
 
 
54d5019
 
ff75bc9
54d5019
 
afb6083
 
54d5019
 
 
afb6083
54d5019
 
ff75bc9
 
 
 
 
54d5019
 
 
ff75bc9
54d5019
 
afb6083
 
54d5019
 
 
 
afb6083
54d5019
 
afb6083
 
54d5019
 
 
afb6083
853a786
 
 
11397c9
c477310
 
11397c9
853a786
0e340b2
 
c477310
 
 
 
 
 
69c538e
c477310
 
 
 
 
ff75bc9
3984848
 
 
afb6083
3984848
 
 
 
 
 
 
 
 
 
 
afb6083
 
 
 
 
69e2e7f
 
 
11397c9
afb6083
b537b8b
afb6083
 
307be7f
afb6083
 
29d2ca1
b537b8b
307be7f
 
 
afb6083
 
19f6879
 
 
 
8db7a66
 
 
 
 
 
 
 
19f6879
 
 
054f094
 
19f6879
 
 
 
 
 
 
054f094
 
19f6879
 
054f094
19f6879
 
 
 
054f094
19f6879
 
 
 
 
 
853a786
19f6879
 
 
 
 
853a786
19f6879
 
 
 
 
853a786
19f6879
 
 
 
 
853a786
19f6879
 
 
 
 
 
 
 
853a786
19f6879
 
853a786
19f6879
 
 
 
 
853a786
19f6879
 
 
 
 
 
 
cbdcf02
19f6879
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
853a786
19f6879
 
 
 
 
 
 
 
 
 
 
 
 
853a786
19f6879
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
853a786
19f6879
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0526966
853a786
97bd68d
e012885
 
10317f7
 
e012885
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
# Import necessary libraries for logging, system operations, and file handling.
import logging
import sys
import os

# Import core components from the 'agno' library for building the agent.
from agno.agent import Agent
from agno.models.openai import OpenAIChat
from agno.knowledge.embedder.openai import OpenAIEmbedder
from agno.tools.duckduckgo import DuckDuckGoTools
from agno.knowledge.knowledge import Knowledge
from agno.vectordb.lancedb import LanceDb, SearchType

# Import Gradio for creating the web user interface.
import gradio as gr

# Import libraries for handling PDFs and images.
import fitz  # PyMuPDF, used for PDF processing.
from PIL import Image # Pillow library for image manipulation.
import io # Used to handle in-memory binary streams.
import requests # For making HTTP requests to download files.
import re # Regular expressions for searching text patterns.

# Configure basic logging to output messages to the console.
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
# Get a logger instance for this script.
logger = logging.getLogger(__name__)

# Retrieve the OpenAI API key from environment variables.
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
# If the API key is not found, raise an error.
if not OPENAI_API_KEY:
    raise ValueError("Missing OPENAI_API_KEY")

# Initialize the Knowledge Base for the agent.
knowledge = Knowledge(
    # Use LanceDB as the vector database to store and search document embeddings.
    vector_db=LanceDb(
        uri="tmp/lancedb", # Directory to store the database.
        table_name="pdf_documents", # Name of the table within the database.
        search_type=SearchType.vector, # Use vector search for finding relevant documents.
        # Use OpenAI's embedding model to convert text into numerical vectors.
        embedder=OpenAIEmbedder(id="text-embedding-3-small"),
    )
)

# A list of URLs pointing to PDF documents that will be added to the knowledge base.
pdf_urls = [
    "https://media.datacamp.com/cms/working-with-hugging-face.pdf",
    "https://media.datacamp.com/cms/ai-agents-cheat-sheet.pdf",
    "https://media.datacamp.com/cms/introduction-to-sql-with-ai-1.pdf",
    "https://media.datacamp.com/legacy/image/upload/v1719844709/Marketing/Blog/Azure_CLI_Cheat_Sheet.pdf",
    "https://s3.amazonaws.com/assets.datacamp.com/email/other/Power+BI_Cheat+Sheet.pdf",
    "https://media.datacamp.com/cms/python-basics-cheat-sheet-v4.pdf"
]

# -----------------------------
# Example Questions
# -----------------------------
example_questions=[
    "How do you log into Azure using device code authentication?",
    "What are the three main components of an AI agent?",
    "What are the \"core four\" Hugging Face libraries?",
    "What SQL clause is used to filter data after grouping?",
    "What is the latest GPT model?"
]

# -----------------------------
# Chat Functions
# -----------------------------
initial_chat = [
    {
        "role": "assistant",
        "content": "Hello, I am Dox, the Data Professional's Guide. Ask me a question about data."
    }
]


# Defines a function to download a file from a URL if it doesn't already exist locally.
def download_if_needed(url, filename):
    # Check if the file path does not exist.
    if not os.path.exists(filename):
        logger.info(f"Downloading {url}...")
        # Send an HTTP GET request to the URL.
        response = requests.get(url)
        # Open the local file in write-binary mode.
        with open(filename, "wb") as f:
            # Write the content of the response to the file.
            f.write(response.content)
        logger.info(f"Downloaded {filename} ({len(response.content)} bytes)")

# Create a directory named 'pdf_cache' to store downloaded PDF files.
# 'exist_ok=True' prevents an error if the directory already exists.
os.makedirs("pdf_cache", exist_ok=True)

# Defines a function to add the specified PDFs to the agent's knowledge base.
def add_pdfs_to_knowledge():
    """Add PDFs to knowledge base using the correct method for the installed agno version"""
    # Create an empty list to hold information about the content to be added.
    contents_to_add = []
    
    # Loop through the list of PDF URLs with their index.
    for i, url in enumerate(pdf_urls):
        # Define a local filename for the cached PDF.
        filename = f"pdf_cache/file_{i}.pdf"
        try:
            # Download the PDF if it's not already in the cache.
            download_if_needed(url, filename)
            # Prepare a dictionary with the file path and metadata (source URL).
            contents_to_add.append({
                "path": filename,
                "metadata": {"source": url}
            })
            logger.info(f"Prepared PDF {i+1}: {url}")
        except Exception as e:
            # Log an error if the PDF preparation fails.
            logger.error(f"Failed to prepare PDF {i+1}: {str(e)}")
    
    # Proceed only if there are PDFs to add.
    if contents_to_add:
        try:
            # This block checks for the correct method to add documents based on the 'agno' library version.
            # Check if the 'add_contents' method (for batch processing) exists.
            if hasattr(knowledge, 'add_contents'):
                knowledge.add_contents(contents_to_add)
                logger.info(f"βœ… Successfully added {len(contents_to_add)} PDFs using add_contents")
            # Else, check if the 'add_content' method (for single item processing) exists.
            elif hasattr(knowledge, 'add_content'):
                for item in contents_to_add:
                    knowledge.add_content(**item)
                logger.info(f"βœ… Successfully added {len(contents_to_add)} PDFs using add_content")
            # As a fallback for older versions, manually read and insert the documents.
            else:
                from agno.document.reader.pdf_reader import PDFReader
                reader = PDFReader()
                all_docs = []
                for item in contents_to_add:
                    docs = reader.read(item["path"])
                    for doc in docs:
                        doc.metadata = item["metadata"]
                        all_docs.append(doc)
                knowledge.vector_db.insert(documents=all_docs)
                logger.info(f"βœ… Successfully added {len(all_docs)} document chunks from {len(contents_to_add)} PDFs")
        except Exception as e:
            # Log and re-raise any exception that occurs during the addition process.
            logger.error(f"Failed to add PDFs: {str(e)}")
            raise
    else:
        # Warn if no PDFs were prepared.
        logger.warning("No PDFs were prepared to add")

# Call the function to load the PDFs into the knowledge base.
add_pdfs_to_knowledge()

# Initialize the AI agent with its configuration.
agent = Agent(
    # Set the underlying language model to OpenAI's GPT-4.1-mini with low temperature for more predictable responses.
    model=OpenAIChat(id="gpt-4.1-mini", temperature=0.2),
    # Give the agent a name/description.
    description="You are Dox a data expert!",
    # Provide detailed instructions (the "system prompt") that govern the agent's behavior.
    instructions="""
    You are a data professional's assistant named Dox.
    Your primary goal is to answer questions about data, programming, cloud computing, AI/ML, and technology topics.
    Here are your operating procedures:
    1.  **Information Gathering Strategy**:
        *   **Prioritize Knowledge Base**: First, search your internal knowledge base for the answer.
        *   **Supplement with Web Search**: If the knowledge base information is outdated, insufficient, or the question is better suited for current web information, use the DuckDuckGo tool to perform web searches to fill in gaps or find the most up-to-date data.
        *   For general technology questions not in your knowledge base, use the DuckDuckGo tool to perform web search to provide accurate answers.
        *   If the question is asking for the "latest" or "most recent" of a data-related topic, ALWAYS use the DuckDuckGo tool to perform the latest web search and datetime to context.
        *   If the question is NOT data-related, you MUST respond with: "Please ask relevant data questions only." and terminate.
    2.  **Response Length Guidelines**:
        *   For basic questions, keep your answer to a maximum of 300 words.
        *   For complex questions, extend your answer to a maximum of 500 words.
    3.  **Citation Rules (CRITICAL)**:
        *   **Knowledge Base Citation**: For any information sourced from your internal knowledge base, you MUST ALWAYS include a citation on a NEW LINE after the answer, starting with "Source: ", followed by the metadata field 'source' to get the hyperlink.
        *   **Web Search Citation**: For any information obtained from the web using the DuckDuckGo tool, you MUST ALWAYS include a citation on a NEW LINE after the answer, starting with "Online Source: ", followed by the full hyperlink.
        *   **Final Rule for Citations**: ALWAYS end your answers with the appropriate citations, ensuring they are on separate lines as specified. Do NOT mix or combine citation types on a single line.
        *   ALWAYS cite with links NOT text like "from internal knowledge base"
    4.  **Accuracy and Non-Hallucination**:
        *   Provide factual and relevant answers based ONLY on the information found in your knowledge base or through the DuckDuckGo tool to perform web searches.
        *   NEVER invent or hallucinate information. If an answer cannot be found, state that directly.
    Make sure to follow these instructions precisely.
    """,
    # Link the agent to the knowledge base created earlier.
    knowledge=knowledge,
    # Automatically add the current date and time to the agent's context.
    add_datetime_to_context=True,
    # Automatically add the user's location to the context (if available).
    add_location_to_context=True,
    # Enable the agent to search its knowledge base by default.
    #search_knowledge=True,
    # Equip the agent with tools, in this case, the ability to search the web using DuckDuckGo.
    tools=[DuckDuckGoTools()],
    # Enable markdown formatting in the agent's output.
    markdown=True
)

# Log a success message indicating the agent is ready.
logger.info("🟒 Agent initialized successfully!")

# Defines a function to process a user's question.
def ask_agent(question):
    logger.info(f"Question asked: {question[:100]}...")
    try:
        # Run the agent with the user's question, ensuring it uses its knowledge base.
        response = agent.run(question, use_knowledge=True)
        # Get the agent's response as a single string.
        full_content = response.get_content_as_string()
    except Exception as e:
        logger.error(str(e))
        return "❌ Something went wrong. Please try again.", None
    # Use a regular expression to find the first URL ending in '.pdf' in the response.
    match = re.search(r'https?://[^\s]+\.pdf', full_content, re.IGNORECASE)
    # Extract the link if a match is found, otherwise set it to None.
    link = match.group(0) if match else None
    
    if link:
        logger.info(f"PDF link found: {link}")
    else:
        logger.info("πŸ”΄ No PDF link found in response")
    # Return the full text response and the extracted PDF link.
    # full_content += "\n\n---\n**πŸ” Try asking:**\n- Give me a real example...\n- Explain step by step...\n- Compare with alternatives..."
    full_content += "\n\n---\n**πŸ“‹ Dox would appreciate your feedback! ⬇️**"
    return full_content, link

# Defines a function to download the raw content of a PDF from a URL.
def download_pdf_from_url(url):
    # Make an HTTP GET request with a timeout.
    response = requests.get(url, timeout=30)
    # Raise an exception if the request was not successful (e.g., 404 error).
    response.raise_for_status()
    # Return the binary content of the PDF.
    return response.content

# A Gradio helper function to update the UI while a PDF is being prepared for display.
def prepare_pdf_loading(link):
    # If a link exists, show a "Loading..." message.
    if link:
        return gr.update(value="πŸ“„ Loading PDF preview...", visible=True)
    # Otherwise, hide the message.
    return gr.update(value="❌ No PDF for preview", visible=True)

# Defines a function to display the first page of a PDF as an image.
def display_pdf(pdf_url):
    # If no URL is provided, hide the image and status components in the UI.
    if not pdf_url:
        return (
            gr.update(value=None, visible=False),
            gr.update(value="", visible=False)
        )
    try:
        # Download the PDF content from the URL.
        pdf_bytes = download_pdf_from_url(pdf_url)
        # Open the PDF from the in-memory bytes.
        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
        # Get the first page of the document.
        page = doc[0]
        # Create a transformation matrix to render the page at a higher resolution.
        zoom = 1.5
        mat = fitz.Matrix(zoom, zoom)
        # Get a pixmap (a raster image) of the page.
        pix = page.get_pixmap(matrix=mat)
        # Convert the pixmap to a PNG image using PIL.
        img = Image.open(io.BytesIO(pix.tobytes("png")))
        # Close the PDF document to free up resources.
        doc.close()
        # Return the image to be displayed in the UI and hide any status messages.
        return (
            gr.update(value=img, visible=True),
            gr.update(value="", visible=False)
        )
    except Exception as e:
        # If an error occurs, log it and display a failure message in the UI.
        logger.error(f"PDF error: {e}")
        return (
            gr.update(value=None, visible=False),
            gr.update(value="❌ Failed to load PDF", visible=True)
        )

theme = gr.themes.Ocean(
    font=[gr.themes.GoogleFont("Inter"), "Segoe UI", "sans-serif"],
    font_mono=[gr.themes.GoogleFont("Fira Code"), "monospace"]
)
DOX_LOGO = "https://raw.githubusercontent.com/AzzamAlnatsheh/DDS_BuildingAIChallenge/main/Dox Transparent Image.png"
custom_css = """
.main-container {
    width: 100%;
    max-width: 100%;
    margin: 0;
    padding: 0 10px;
}

/* HEADER */
.header-card {
    background: linear-gradient(135deg, #0B1F3A, #102A4C);
    border-radius: 24px;
    padding: 0px;
    box-shadow: 0 16px 40px rgba(2, 6, 23, 0.4);
    border: 1px solid #1F3B5C;
    margin: 0px;
}

/* SIDEBAR */
.sidebar-card {
    background: rgba(11, 31, 58, 0.95);
    border-radius: 24px;
    padding: 24px;
    box-shadow: 0 16px 40px rgba(2, 6, 23, 0.35);
    border: 1px solid #1F3B5C;
    height: 100%;
}

/* CHAT */
.chat-card {
    background: rgba(16, 42, 76, 0.95);
    border-radius: 24px;
    padding: 22px;
    box-shadow: 0 16px 40px rgba(2, 6, 23, 0.35);
    border: 1px solid #1F3B5C;
}

/* LOGO */
.logo-img {
    width: 200px;
    height: 170px;
    margin: 0px;
    padding: 0px;
}

/* TITLE */
.title-text {
    font-size: 32px;
    font-weight: 850;
    color: #E6F0FA;
    margin-bottom: 8px;
    letter-spacing: -0.03em;
}

/* SUBTITLE */
.subtitle-text {
    font-size: 16px;
    color: #AFCBE8;
    line-height: 1.65;
    max-width: auto;
}

/* BADGES */
.badge {
    display: inline-block;
    background: rgba(30, 167, 255, 0.15);
    color: #4FD1FF;
    padding: 7px 13px;
    border-radius: 999px;
    font-size: 13px;
    font-weight: 650;
    margin-right: 7px;
    margin-bottom: 8px;
    border: 1px solid rgba(30, 167, 255, 0.3);
}

/* STATUS BOX */
.status-box {
    background: rgba(16, 42, 76, 0.85);
    border: 1px solid #1F3B5C;
    padding: 14px;
    border-radius: 16px;
    font-size: 14px;
    color: #CFE6FF;
    line-height: 1.6;
}

/* NOTES */
.small-note {
    font-size: 13px;
    color: #9FB9D9;
    line-height: 1.55;
}

.footer-note {
    font-size: 13px;
    color: #9FB9D9;
    text-align: center;
    margin-top: 18px;
}

/* CHATBOT BOX */
#chatbot {
    min-height: 540px;
    border-radius: 18px;
    border: 1px solid #1F3B5C;
    background: rgba(11, 31, 58, 0.9);
}

/* INPUT */
#question_box textarea {
    border-radius: 16px !important;
    background: #0B1F3A;
    color: #E6F0FA;
    border: 1px solid #1F3B5C;
}

/* EXAMPLES */
.example-button {
    margin-bottom: 8px !important;
    border-radius: 14px !important;
    white-space: normal !important;
    text-align: left !important;
    background: rgba(30, 167, 255, 0.08);
    color: #CFE6FF;
    border: 1px solid rgba(30, 167, 255, 0.2);
}

/* PRIMARY BUTTON */
.primary-action {
    border-radius: 14px !important;
    background: linear-gradient(135deg, #1EA7FF, #4FD1FF) !important;
    color: #0B1F3A !important;
    border: none !important;
    font-weight: 700;
}

/* CLEAR BUTTON */
.clear-action {
    border-radius: 14px !important;
    background: rgba(255, 255, 255, 0.05) !important;
    color: #E6F0FA !important;
    border: 1px solid #1F3B5C !important;
}
"""

# Create the Gradio interface using `gr.Blocks` for a custom layout.
with gr.Blocks(title="πŸ€– Dox the Data Professional's Guide πŸ€–",
    #theme=theme,
    #css=demo_css,
    fill_width=True
) as demo:
    def run_example(question_text, chat_history):
        return chat_ui(question_text, chat_history)
    with gr.Column(elem_classes=["main-container"]):

        # -----------------------------
        # Header
        # -----------------------------
        with gr.Row(elem_classes=["header-card"]):
            with gr.Column(scale=1):
                gr.HTML(
                    f"""
                    <img src="{DOX_LOGO}" class="logo-img" alt="DOX Logo">
                    """
                )
            
            with gr.Column(scale=5):
                gr.HTML(
                    """
                    <div class="title-text">πŸ€– Dox the Data Professional's Advisor πŸ€–</div>
                    <div class="subtitle-text">
                        A professional data chatbot that aims in reminding and helping data experts in certain concepts in a simplified way 
                        while also having access to download DataCamp's public cheatsheets on many data-related topics.
                    </div>
                    <br>
                    <span class="badge">Agno</span>
                    <span class="badge">LanceDB</span>
                    <span class="badge">OpenAI</span>
                    <span class="badge">Gradio</span>
                    """
                )
        # -----------------------------
        # Two Column Layout
        # -----------------------------
        with gr.Row():

            # Right Chat Area
            with gr.Column(scale=3, elem_classes=["chat-card"]):
                gr.Markdown("### 🧠 Dox is an expert in the following topics: \n1️⃣ Hugging Face | 2️⃣ AI Agents | 3️⃣ SQL with AI | 4️⃣ Azure CLI | 5️⃣ Power BI | 6️⃣ Python")

                chatbot = gr.Chatbot(
                    label="πŸ’¬ Conversation",
                    elem_id="chatbot",
                    value=initial_chat.copy(),
                    height=450
                )

                question = gr.Textbox(
                    label="πŸ™‹ Ask Dox a question:",
                    placeholder="πŸ€” Type your question here...",
                    lines=1, 
                    elem_classes="question_box"
                )

                with gr.Row():
                    ask_btn = gr.Button("Submit πŸ“€", variant="primary", elem_classes="primary-action")
                    clear_btn = gr.Button("🧹 Clear Chat", elem_classes="clear-action")
                # A section for example questions.
                gr.Markdown("### πŸ’‘ Example Questions", elem_classes="example-button")
                examples = gr.Examples(
                    examples=example_questions,
                    inputs=question,
                    outputs=[chatbot, question],
                    fn=run_example,
                    cache_examples=False
                )

                # πŸ‘πŸ‘Ž Feedback buttons
                with gr.Row():
                    thumbs_up = gr.Button("πŸ‘ Helpful", elem_classes="clear-action")
                    thumbs_down = gr.Button("πŸ‘Ž Not Helpful", elem_classes="clear-action")
                 
                # Hidden feedback box (only appears on πŸ‘Ž)
                feedback_box = gr.Textbox(
                    placeholder="πŸ’¬ Optional: tell us what went wrong...",
                    visible=False
                )
                 
                submit_feedback_btn = gr.Button("πŸ“ Submit Feedback", visible=False, elem_classes="clear-action")
                feedback_status = gr.Markdown("", elem_classes="clear-action")
            # RIGHT-SIDE COLUMN: for the PDF preview.
            with gr.Column(scale=3):
                gr.Markdown("### πŸ“„ Referenced PDF Document (🌐 Empty for Web Results)", elem_classes="clear-action")
                #gr.Markdown(" 🌐 Empty by default", elem_classes="component")
                # A hidden state to store the PDF link found in the agent's response.
                link_state = gr.State()
                # A markdown component to show PDF loading status.
                pdf_status = gr.Markdown(visible=False, elem_classes="clear-action")
                # An image component to display the PDF preview.
                output_image = gr.Image(
                    label="⬇️ Cheat Sheet Preview",
                    visible=False
                )
                pdf_link_btn = gr.Markdown("")

    # Defines the main chat logic as a generator function for streaming output.
    def chat_ui(user_message, chat_history):
        # Initialize chat history if it's the first turn.
        if chat_history is None:
            chat_history = []
    
        # Append the user's message to the chat history.
        chat_history.append({
            "role": "user",
            "content": user_message
        })
    
        # Append a temporary "Thinking..." message from the assistant.
        chat_history.append({
            "role": "assistant",
            "content": "πŸ€” Dox is thinking..."
        })
    
        # `yield` immediately updates the UI with the user's message and "Thinking...".
        # It also clears the user's input textbox.
        yield (
            chat_history,
            None, # No link yet.
            gr.update(value=None, visible=False), # Hide image preview.
            ""  # Clear textbox.
        )
    
        # Call the agent to get the actual response and PDF link.
        response_text, link = ask_agent(user_message)
    
        # Replace the "Thinking..." message with the final response from the agent.
        chat_history[-1] = {
            "role": "assistant",
            "content": response_text
        }
    
        # `yield` again to update the UI with the final response.
        yield (
            chat_history,
            link, # Pass the extracted link to the link_state.
            gr.update(value=None, visible=False), # Keep image preview hidden for now.
            "" # Keep textbox clear.
        )
        
    # This is a helper function to avoid repeating the event handler chain.
    def submit_chain():
        # It specifies that `chat_ui` is the function to run.
        # It maps the `question` textbox and `chatbot` history as inputs.
        # It maps the outputs to `chatbot` history, `link_state`, `output_image`, and clears the `question` textbox.
        return (
            chat_ui,
            [question, chatbot],
            [chatbot, link_state, output_image, question]
        )

    def show_pdf_link(link):
        if link:
            return f"[πŸ“₯ Open Full PDF]({link})"
        return ""

    def clear_chat():
        return [], None, gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=None, visible=False)

    clear_btn.click(
        clear_chat,
        outputs=[chatbot, link_state, output_image, feedback_box, submit_feedback_btn]
    )

    def show_feedback_box():
        return gr.update(visible=True), gr.update(visible=True)

    def show_appreciation():
        logger.info("It was helpful!")
        return "βœ… Feedback submitted. Thank you!"
     
    thumbs_down.click(
        show_feedback_box,
        outputs=[feedback_box, submit_feedback_btn]
    )

    thumbs_up.click(
        show_appreciation,
        outputs=feedback_status
    )

    def handle_feedback(text):
        logger.info(f"User feedback: {text}")
        return "βœ… Feedback submitted. Thank you!"
     
    submit_feedback_btn.click(
        handle_feedback,
        inputs=feedback_box,
        outputs=feedback_status
    )

    examples.dataset.click(
        *submit_chain()
    ).then(
        prepare_pdf_loading,
        inputs=link_state,
        outputs=pdf_status
    ).then(
        display_pdf,
        inputs=link_state,
        outputs=[output_image, pdf_status]
    ).then(
        show_pdf_link,
        inputs=link_state,
        outputs=pdf_link_btn
    )

    # Set up the event handler for the "Submit" button click.
    ask_btn.click(
        *submit_chain()
    # `.then()` chains subsequent actions after the first one completes.
    ).then(
        # After chat_ui, call `prepare_pdf_loading` to show the "loading" message.
        prepare_pdf_loading,
        inputs=link_state,  # Use the link from chat_ui's output.
        outputs=pdf_status # Update the pdf_status text.
    ).then(
        # Finally, call `display_pdf` to render the PDF page.
        display_pdf,
        inputs=link_state, # Use the same link.
        outputs=[output_image, pdf_status] # Update the image and hide the status text.
    ).then(
        show_pdf_link,
        inputs=link_state,
        outputs=pdf_link_btn
    )
    
    # Set up the same event handler for when the user presses Enter in the textbox.
    question.submit(
        *submit_chain()
    ).then(
        prepare_pdf_loading,
        inputs=link_state,
        outputs=pdf_status
    ).then(
        display_pdf,
        inputs=link_state,
        outputs=[output_image, pdf_status]
    ).then(
        show_pdf_link,
        inputs=link_state,
        outputs=pdf_link_btn
    )

# This block ensures the code inside only runs when the script is executed directly.
if __name__ == "__main__":
    demo.launch(
        theme=gr.themes.Ocean(
            primary_hue="indigo",
            neutral_hue="slate"
        ),
        css=custom_css
    )