""" Gradio admin interface for content management Allows uploading documents, scraping URLs, and managing content """ import gradio as gr import os import html as html_lib from dotenv import load_dotenv from qdrant_client import QdrantClient, models from src.ingestion import ingest_document from src.scraper import process_and_store_webpage from src.config import load_settings, save_settings load_dotenv() # Initialize Qdrant client client = QdrantClient( url=os.getenv("QDRANT_URL"), api_key=os.getenv("QDRANT_API_KEY") ) collection_name = os.getenv("QDRANT_COLLECTION", "hr-intervals") # Create index for metadata.source to enable filtering try: client.create_payload_index( collection_name=collection_name, field_name="metadata.source", field_schema=models.PayloadSchemaType.KEYWORD ) print("✅ Payload index for metadata.source created successfully") except Exception as e: # Index might already exist or collection not found print(f"â„šī¸ Index status: {str(e)}") # ==================== Functions ==================== def list_all_documents(): """ List all uploaded documents Returns: HTML table string with selectable content """ try: # Paginate through ALL points (Qdrant has 5800+ points) all_points = [] offset = None while True: result = client.scroll( collection_name=collection_name, limit=1000, offset=offset, with_payload=True ) points, next_offset = result all_points.extend(points) if next_offset is None: break offset = next_offset # Group by source docs_dict = {} for point in all_points: payload = point.payload # Metadata is nested inside payload metadata = payload.get("metadata", {}) source = metadata.get("source", "Unknown") if source not in docs_dict: docs_dict[source] = { "name": source, "type": metadata.get("type", "Unknown"), "date": metadata.get("upload_date", "Unknown"), "chunks": 0 } docs_dict[source]["chunks"] += 1 # Create HTML table with selectable text if not docs_dict or (len(docs_dict) == 1 and "Unknown" in docs_dict): return """

📂 No documents yet

""" html = """ """ for doc in docs_dict.values(): html += f""" """ html += """
Document Name Type Upload Date Chunks
{html_lib.escape(doc['name'])} {html_lib.escape(doc['type'])} {html_lib.escape(doc['date'])} {doc['chunks']}
""" return html except Exception as e: return f"""

❌ Error: {str(e)}

""" def upload_document(file, doc_type="document"): """ Upload PDF or DOCX file Args: file: Uploaded file object doc_type: Type of document Returns: Success message """ if file is None: return "❌ Please select a file" try: file_path = file.name # Ingest document num_chunks = ingest_document(file_path, doc_type) return f"✅ Success!\n\nFile: {os.path.basename(file_path)}\nChunks created: {num_chunks}\nType: {doc_type}" except Exception as e: return f"❌ Upload failed:\n{str(e)}" def scrape_single_url(url): """ Scrape single URL Args: url: URL to scrape Returns: Success message """ if not url: return "❌ Please enter a URL" try: num_chunks = process_and_store_webpage(url) return f"✅ Success!\n\nURL: {url}\nChunks created: {num_chunks}" except Exception as e: return f"❌ Scraping failed:\n{str(e)}" def scrape_multiple_urls(urls_text): """ Scrape multiple URLs Args: urls_text: URLs separated by newlines Returns: Summary of results """ if not urls_text: return "❌ Please enter URLs (one per line)" urls = [url.strip() for url in urls_text.split('\n') if url.strip()] results = [] success_count = 0 fail_count = 0 for url in urls: try: num_chunks = process_and_store_webpage(url) results.append(f"✅ {url}: {num_chunks} chunks") success_count += 1 except Exception as e: results.append(f"❌ {url}: {str(e)}") fail_count += 1 summary = f"📊 Summary: {success_count} succeeded, {fail_count} failed\n\n" return summary + "\n".join(results) def delete_document(source_name): """ Delete document by source name Args: source_name: Name or URL of the source Returns: Success message """ if not source_name: return "❌ Please enter document name or URL" try: client.delete( collection_name=collection_name, points_selector=models.FilterSelector( filter=models.Filter( must=[ models.FieldCondition( key="metadata.source", match=models.MatchValue(value=source_name) ) ] ) ) ) return f"✅ Successfully deleted all content from:\n{source_name}" except Exception as e: return f"❌ Deletion failed:\n{str(e)}" def get_current_settings(): """Load current settings and return as individual values for the UI.""" s = load_settings() return ( s["disclaimer"], s["welcome_message"], s["bot_avatar_url"], s["primary_color"], s["secondary_color"], s["font_family"], ) def save_chatbot_settings(disclaimer, welcome_message, bot_avatar_url, primary_color, secondary_color, font_family): """Save chatbot settings to config file.""" try: s = load_settings() s["disclaimer"] = disclaimer s["welcome_message"] = welcome_message s["bot_avatar_url"] = bot_avatar_url s["primary_color"] = primary_color s["secondary_color"] = secondary_color s["font_family"] = font_family save_settings(s) return "✅ Settings saved! Restart the chatbot space for changes to take effect." except Exception as e: return f"❌ Failed to save: {str(e)}" # ==================== Gradio Interface (5.49) ==================== with gr.Blocks( title="HR Intervals - Admin Panel", theme=gr.themes.Soft() ) as demo: gr.Markdown("# 📁 HR Intervals - Knowledge Base Management") gr.Markdown("Manage documents and web content for the AI assistant") with gr.Tabs(): # Tab 1: View Documents with gr.Tab("📋 View Documents"): gr.Markdown("### Current documents in knowledge base") gr.Markdown("💡 *Tip: You can select and copy any text from the table below*") refresh_btn = gr.Button("🔄 Refresh List", variant="primary") docs_table = gr.HTML( label="Documents" ) refresh_btn.click(list_all_documents, outputs=docs_table) demo.load(list_all_documents, outputs=docs_table) # Tab 2: Upload Documents with gr.Tab("âŦ†ī¸ Upload Documents"): gr.Markdown("### Upload PDF or DOCX files") file_input = gr.File( label="Select File (PDF or DOCX)", file_types=[".pdf", ".docx"] ) doc_type_input = gr.Radio( choices=["document", "policy", "guide", "article"], value="document", label="Document Type" ) upload_btn = gr.Button("📤 Upload", variant="primary", size="lg") upload_output = gr.Textbox(label="Upload Result", lines=5) upload_btn.click( upload_document, inputs=[file_input, doc_type_input], outputs=upload_output ) # Tab 3: Scrape URLs with gr.Tab("🌐 Scrape Web Pages"): gr.Markdown("### Scrape content from URLs") with gr.Row(): with gr.Column(): gr.Markdown("#### Single URL") url_input = gr.Textbox( label="Enter URL", placeholder="https://example.com/article" ) scrape_btn = gr.Button("🔍 Scrape", variant="primary") scrape_output = gr.Textbox(label="Result", lines=4) scrape_btn.click( scrape_single_url, inputs=url_input, outputs=scrape_output ) with gr.Column(): gr.Markdown("#### Batch URLs") urls_input = gr.Textbox( label="Enter multiple URLs (one per line)", placeholder="https://example.com/page1\nhttps://example.com/page2", lines=6 ) batch_btn = gr.Button("🔍 Batch Scrape", variant="primary") batch_output = gr.Textbox(label="Batch Results", lines=8) batch_btn.click( scrape_multiple_urls, inputs=urls_input, outputs=batch_output ) # Tab 4: Delete Documents with gr.Tab("đŸ—‘ī¸ Delete Documents"): gr.Markdown("### Delete documents or web pages") gr.Markdown("âš ī¸ **Warning**: This operation cannot be undone!") delete_input = gr.Textbox( label="Document Name or URL", placeholder="e.g., hiring_policy.pdf or https://example.com/article" ) delete_btn = gr.Button("đŸ—‘ī¸ Delete", variant="stop", size="lg") delete_output = gr.Textbox(label="Delete Result", lines=3) delete_btn.click( delete_document, inputs=delete_input, outputs=delete_output ) # Tab 5: Chatbot Settings with gr.Tab("âš™ī¸ Chatbot Settings"): gr.Markdown("### Chatbot Appearance & Text Settings") gr.Markdown("Changes take effect after the chatbot space restarts.") with gr.Row(): with gr.Column(): setting_primary_color = gr.ColorPicker(label="Primary Color (buttons, links)") setting_secondary_color = gr.ColorPicker(label="Secondary Color (background)") setting_font = gr.Textbox(label="Font Family", placeholder="Arial, sans-serif") setting_avatar = gr.Textbox(label="Bot Avatar Image URL", placeholder="https://...") with gr.Column(): setting_disclaimer = gr.Textbox(label="Disclaimer Text (Markdown)", lines=6) setting_welcome = gr.Textbox(label="Welcome Message (Markdown)", lines=8) save_settings_btn = gr.Button("💾 Save Settings", variant="primary", size="lg") settings_output = gr.Textbox(label="Result", lines=2) save_settings_btn.click( save_chatbot_settings, inputs=[setting_disclaimer, setting_welcome, setting_avatar, setting_primary_color, setting_secondary_color, setting_font], outputs=settings_output ) demo.load( get_current_settings, outputs=[setting_disclaimer, setting_welcome, setting_avatar, setting_primary_color, setting_secondary_color, setting_font] ) # Tab 6: Help with gr.Tab("â„šī¸ Help"): gr.Markdown(""" ### Usage Guide #### 📋 View Documents - Shows all uploaded documents and web pages - Displays document type, upload date, and number of chunks - Click "Refresh" to see the latest status #### âŦ†ī¸ Upload Documents - Supports PDF and DOCX formats - Documents are automatically split into chunks (~1000 characters each) - You can categorize documents by type #### 🌐 Scrape Web Pages - Enter full URLs (including https://) - Supports single or batch scraping - Content is automatically converted to Markdown format #### đŸ—‘ī¸ Delete Documents - Enter exact filename or URL - Deletes all chunks from that source - **Warning**: Cannot be undone! - **Tip**: To update a document, delete it first then upload the new version --- ### Advanced Management For detailed vector database management, visit: [Qdrant Cloud Dashboard](https://cloud.qdrant.io) ### Technical Support If you encounter issues, please contact the development team. """) if __name__ == "__main__": admin_user = os.getenv("ADMIN_USERNAME", "admin") admin_pass = os.getenv("ADMIN_PASSWORD", "hr-intervals-2026") demo.launch( server_name="0.0.0.0", server_port=7861, share=False, auth=(admin_user, admin_pass), )