"""
Gradio admin interface for content management
Allows uploading documents, scraping URLs, and managing content
"""
import gradio as gr
import os
import html as html_lib
from dotenv import load_dotenv
from qdrant_client import QdrantClient, models
from src.ingestion import ingest_document
from src.scraper import process_and_store_webpage
from src.config import load_settings, save_settings
load_dotenv()
# Initialize Qdrant client
client = QdrantClient(
url=os.getenv("QDRANT_URL"),
api_key=os.getenv("QDRANT_API_KEY")
)
collection_name = os.getenv("QDRANT_COLLECTION", "hr-intervals")
# Create index for metadata.source to enable filtering
try:
client.create_payload_index(
collection_name=collection_name,
field_name="metadata.source",
field_schema=models.PayloadSchemaType.KEYWORD
)
print("â
Payload index for metadata.source created successfully")
except Exception as e:
# Index might already exist or collection not found
print(f"âšī¸ Index status: {str(e)}")
# ==================== Functions ====================
def list_all_documents():
"""
List all uploaded documents
Returns:
HTML table string with selectable content
"""
try:
# Paginate through ALL points (Qdrant has 5800+ points)
all_points = []
offset = None
while True:
result = client.scroll(
collection_name=collection_name,
limit=1000,
offset=offset,
with_payload=True
)
points, next_offset = result
all_points.extend(points)
if next_offset is None:
break
offset = next_offset
# Group by source
docs_dict = {}
for point in all_points:
payload = point.payload
# Metadata is nested inside payload
metadata = payload.get("metadata", {})
source = metadata.get("source", "Unknown")
if source not in docs_dict:
docs_dict[source] = {
"name": source,
"type": metadata.get("type", "Unknown"),
"date": metadata.get("upload_date", "Unknown"),
"chunks": 0
}
docs_dict[source]["chunks"] += 1
# Create HTML table with selectable text
if not docs_dict or (len(docs_dict) == 1 and "Unknown" in docs_dict):
return """
"""
html = """
| Document Name |
Type |
Upload Date |
Chunks |
"""
for doc in docs_dict.values():
html += f"""
| {html_lib.escape(doc['name'])} |
{html_lib.escape(doc['type'])} |
{html_lib.escape(doc['date'])} |
{doc['chunks']} |
"""
html += """
"""
return html
except Exception as e:
return f"""
"""
def upload_document(file, doc_type="document"):
"""
Upload PDF or DOCX file
Args:
file: Uploaded file object
doc_type: Type of document
Returns:
Success message
"""
if file is None:
return "â Please select a file"
try:
file_path = file.name
# Ingest document
num_chunks = ingest_document(file_path, doc_type)
return f"â
Success!\n\nFile: {os.path.basename(file_path)}\nChunks created: {num_chunks}\nType: {doc_type}"
except Exception as e:
return f"â Upload failed:\n{str(e)}"
def scrape_single_url(url):
"""
Scrape single URL
Args:
url: URL to scrape
Returns:
Success message
"""
if not url:
return "â Please enter a URL"
try:
num_chunks = process_and_store_webpage(url)
return f"â
Success!\n\nURL: {url}\nChunks created: {num_chunks}"
except Exception as e:
return f"â Scraping failed:\n{str(e)}"
def scrape_multiple_urls(urls_text):
"""
Scrape multiple URLs
Args:
urls_text: URLs separated by newlines
Returns:
Summary of results
"""
if not urls_text:
return "â Please enter URLs (one per line)"
urls = [url.strip() for url in urls_text.split('\n') if url.strip()]
results = []
success_count = 0
fail_count = 0
for url in urls:
try:
num_chunks = process_and_store_webpage(url)
results.append(f"â
{url}: {num_chunks} chunks")
success_count += 1
except Exception as e:
results.append(f"â {url}: {str(e)}")
fail_count += 1
summary = f"đ Summary: {success_count} succeeded, {fail_count} failed\n\n"
return summary + "\n".join(results)
def delete_document(source_name):
"""
Delete document by source name
Args:
source_name: Name or URL of the source
Returns:
Success message
"""
if not source_name:
return "â Please enter document name or URL"
try:
client.delete(
collection_name=collection_name,
points_selector=models.FilterSelector(
filter=models.Filter(
must=[
models.FieldCondition(
key="metadata.source",
match=models.MatchValue(value=source_name)
)
]
)
)
)
return f"â
Successfully deleted all content from:\n{source_name}"
except Exception as e:
return f"â Deletion failed:\n{str(e)}"
def get_current_settings():
"""Load current settings and return as individual values for the UI."""
s = load_settings()
return (
s["disclaimer"],
s["welcome_message"],
s["bot_avatar_url"],
s["primary_color"],
s["secondary_color"],
s["font_family"],
)
def save_chatbot_settings(disclaimer, welcome_message, bot_avatar_url, primary_color, secondary_color, font_family):
"""Save chatbot settings to config file."""
try:
s = load_settings()
s["disclaimer"] = disclaimer
s["welcome_message"] = welcome_message
s["bot_avatar_url"] = bot_avatar_url
s["primary_color"] = primary_color
s["secondary_color"] = secondary_color
s["font_family"] = font_family
save_settings(s)
return "â
Settings saved! Restart the chatbot space for changes to take effect."
except Exception as e:
return f"â Failed to save: {str(e)}"
# ==================== Gradio Interface (5.49) ====================
with gr.Blocks(
title="HR Intervals - Admin Panel",
theme=gr.themes.Soft()
) as demo:
gr.Markdown("# đ HR Intervals - Knowledge Base Management")
gr.Markdown("Manage documents and web content for the AI assistant")
with gr.Tabs():
# Tab 1: View Documents
with gr.Tab("đ View Documents"):
gr.Markdown("### Current documents in knowledge base")
gr.Markdown("đĄ *Tip: You can select and copy any text from the table below*")
refresh_btn = gr.Button("đ Refresh List", variant="primary")
docs_table = gr.HTML(
label="Documents"
)
refresh_btn.click(list_all_documents, outputs=docs_table)
demo.load(list_all_documents, outputs=docs_table)
# Tab 2: Upload Documents
with gr.Tab("âŦī¸ Upload Documents"):
gr.Markdown("### Upload PDF or DOCX files")
file_input = gr.File(
label="Select File (PDF or DOCX)",
file_types=[".pdf", ".docx"]
)
doc_type_input = gr.Radio(
choices=["document", "policy", "guide", "article"],
value="document",
label="Document Type"
)
upload_btn = gr.Button("đ¤ Upload", variant="primary", size="lg")
upload_output = gr.Textbox(label="Upload Result", lines=5)
upload_btn.click(
upload_document,
inputs=[file_input, doc_type_input],
outputs=upload_output
)
# Tab 3: Scrape URLs
with gr.Tab("đ Scrape Web Pages"):
gr.Markdown("### Scrape content from URLs")
with gr.Row():
with gr.Column():
gr.Markdown("#### Single URL")
url_input = gr.Textbox(
label="Enter URL",
placeholder="https://example.com/article"
)
scrape_btn = gr.Button("đ Scrape", variant="primary")
scrape_output = gr.Textbox(label="Result", lines=4)
scrape_btn.click(
scrape_single_url,
inputs=url_input,
outputs=scrape_output
)
with gr.Column():
gr.Markdown("#### Batch URLs")
urls_input = gr.Textbox(
label="Enter multiple URLs (one per line)",
placeholder="https://example.com/page1\nhttps://example.com/page2",
lines=6
)
batch_btn = gr.Button("đ Batch Scrape", variant="primary")
batch_output = gr.Textbox(label="Batch Results", lines=8)
batch_btn.click(
scrape_multiple_urls,
inputs=urls_input,
outputs=batch_output
)
# Tab 4: Delete Documents
with gr.Tab("đī¸ Delete Documents"):
gr.Markdown("### Delete documents or web pages")
gr.Markdown("â ī¸ **Warning**: This operation cannot be undone!")
delete_input = gr.Textbox(
label="Document Name or URL",
placeholder="e.g., hiring_policy.pdf or https://example.com/article"
)
delete_btn = gr.Button("đī¸ Delete", variant="stop", size="lg")
delete_output = gr.Textbox(label="Delete Result", lines=3)
delete_btn.click(
delete_document,
inputs=delete_input,
outputs=delete_output
)
# Tab 5: Chatbot Settings
with gr.Tab("âī¸ Chatbot Settings"):
gr.Markdown("### Chatbot Appearance & Text Settings")
gr.Markdown("Changes take effect after the chatbot space restarts.")
with gr.Row():
with gr.Column():
setting_primary_color = gr.ColorPicker(label="Primary Color (buttons, links)")
setting_secondary_color = gr.ColorPicker(label="Secondary Color (background)")
setting_font = gr.Textbox(label="Font Family", placeholder="Arial, sans-serif")
setting_avatar = gr.Textbox(label="Bot Avatar Image URL", placeholder="https://...")
with gr.Column():
setting_disclaimer = gr.Textbox(label="Disclaimer Text (Markdown)", lines=6)
setting_welcome = gr.Textbox(label="Welcome Message (Markdown)", lines=8)
save_settings_btn = gr.Button("đž Save Settings", variant="primary", size="lg")
settings_output = gr.Textbox(label="Result", lines=2)
save_settings_btn.click(
save_chatbot_settings,
inputs=[setting_disclaimer, setting_welcome, setting_avatar,
setting_primary_color, setting_secondary_color, setting_font],
outputs=settings_output
)
demo.load(
get_current_settings,
outputs=[setting_disclaimer, setting_welcome, setting_avatar,
setting_primary_color, setting_secondary_color, setting_font]
)
# Tab 6: Help
with gr.Tab("âšī¸ Help"):
gr.Markdown("""
### Usage Guide
#### đ View Documents
- Shows all uploaded documents and web pages
- Displays document type, upload date, and number of chunks
- Click "Refresh" to see the latest status
#### âŦī¸ Upload Documents
- Supports PDF and DOCX formats
- Documents are automatically split into chunks (~1000 characters each)
- You can categorize documents by type
#### đ Scrape Web Pages
- Enter full URLs (including https://)
- Supports single or batch scraping
- Content is automatically converted to Markdown format
#### đī¸ Delete Documents
- Enter exact filename or URL
- Deletes all chunks from that source
- **Warning**: Cannot be undone!
- **Tip**: To update a document, delete it first then upload the new version
---
### Advanced Management
For detailed vector database management, visit:
[Qdrant Cloud Dashboard](https://cloud.qdrant.io)
### Technical Support
If you encounter issues, please contact the development team.
""")
if __name__ == "__main__":
admin_user = os.getenv("ADMIN_USERNAME", "admin")
admin_pass = os.getenv("ADMIN_PASSWORD", "hr-intervals-2026")
demo.launch(
server_name="0.0.0.0",
server_port=7861,
share=False,
auth=(admin_user, admin_pass),
)