open-navigator / databricks /notebooks /01_agent_bricks_quickstart.py
jcbowyer's picture
Clean HuggingFace deployment without binary files
61d29fc
# Databricks notebook source
# MAGIC %md
# MAGIC # Oral Health Policy Finder - Agent Bricks Quickstart
# MAGIC
# MAGIC This notebook demonstrates the Databricks Agent Bricks implementation of the Oral Health Policy Finder system.
# MAGIC
# MAGIC **Features:**
# MAGIC - MLflow-based agents with automatic tracing
# MAGIC - Unity Catalog governance
# MAGIC - Model Serving deployment
# MAGIC - Agent evaluation framework
# MAGIC - Delta Lake integration
# MAGIC
# MAGIC **Prerequisites:**
# MAGIC - Databricks Runtime 14.3 LTS ML or higher
# MAGIC - Unity Catalog enabled
# MAGIC - Model Serving permissions
# COMMAND ----------
# MAGIC %md
# MAGIC ## 1. Setup and Configuration
# COMMAND ----------
# Install dependencies
%pip install -q mlflow>=2.10.0 databricks-agents>=0.1.0 langchain>=0.1.0 openai>=1.6.0
# COMMAND ----------
# Configure MLflow
import mlflow
mlflow.set_registry_uri("databricks-uc")
# Set Unity Catalog
CATALOG = "main"
SCHEMA = "agents"
# Ensure catalog and schema exist
spark.sql(f"CREATE CATALOG IF NOT EXISTS {CATALOG}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CATALOG}.{SCHEMA}")
print(f"✅ Using Unity Catalog: {CATALOG}.{SCHEMA}")
# COMMAND ----------
# MAGIC %md
# MAGIC ## 2. Test Policy Classifier Agent Locally
# COMMAND ----------
# Import agent
import sys
sys.path.append("/Workspace/Repos/your-repo/open-navigator")
from agents.mlflow_classifier import PolicyClassifierAgent
# Initialize agent
agent = PolicyClassifierAgent()
# Test with sample document
test_input = {
"document_id": "test_001",
"title": "City Council Meeting - Water Infrastructure",
"content": """
The city council voted 5-2 to approve fluoridation of the municipal water supply.
The program will begin next quarter with monitoring by the health department.
Expected to benefit approximately 50,000 residents.
"""
}
# Get prediction
result = agent.predict(None, test_input)
print("Classification Result:")
print(f" Topic: {result['primary_topic']}")
print(f" Confidence: {result['confidence']:.2%}")
print(f" Method: {result['method']}")
print(f" Reasoning: {result['reasoning']}")
# COMMAND ----------
# MAGIC %md
# MAGIC ## 3. Register Agent to Unity Catalog
# COMMAND ----------
from databricks.deployment import AgentDeploymentManager
# Initialize deployment manager
manager = AgentDeploymentManager()
# Register agent
version = manager.register_agent(
agent_class=PolicyClassifierAgent,
agent_name="policy_classifier",
description="Classifies government meeting documents for oral health policy topics",
tags={
"team": "advocacy",
"domain": "oral_health",
"framework": "databricks-agent-bricks"
}
)
print(f"✅ Registered policy_classifier version {version}")
print(f" Model: {CATALOG}.{SCHEMA}.policy_classifier")
# COMMAND ----------
# MAGIC %md
# MAGIC ## 4. Deploy to Model Serving
# COMMAND ----------
# Deploy agent to serving endpoint
endpoint_url = manager.deploy_agent(
agent_name="policy_classifier",
endpoint_name="policy-classifier-dev",
version=version,
workload_size="Small",
scale_to_zero=True
)
print(f"✅ Deployed to Model Serving")
print(f" Endpoint: policy-classifier-dev")
print(f" URL: {endpoint_url}")
# COMMAND ----------
# MAGIC %md
# MAGIC ## 5. Test Deployed Endpoint
# COMMAND ----------
import requests
import os
# Test endpoint
endpoint_name = "policy-classifier-dev"
databricks_host = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiUrl().get()
databricks_token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
url = f"{databricks_host}/serving-endpoints/{endpoint_name}/invocations"
headers = {
"Authorization": f"Bearer {databricks_token}",
"Content-Type": "application/json"
}
test_payload = {
"dataframe_records": [
{
"document_id": "endpoint_test_001",
"title": "School Board Meeting",
"content": "Discussion of new dental screening program for elementary students"
}
]
}
response = requests.post(url, headers=headers, json=test_payload)
result = response.json()
print("Endpoint Response:")
print(result)
# COMMAND ----------
# MAGIC %md
# MAGIC ## 6. Evaluate Agent Performance
# COMMAND ----------
from databricks.evaluation import AgentEvaluator
import pandas as pd
# Create evaluation dataset
eval_data = pd.DataFrame([
{
"document_id": "eval_001",
"title": "Water Board Meeting",
"content": "Approved fluoride addition to water supply",
"ground_truth": "water_fluoridation"
},
{
"document_id": "eval_002",
"title": "School Board Session",
"content": "New dental screening program for students",
"ground_truth": "school_dental_screening"
},
{
"document_id": "eval_003",
"title": "Budget Review",
"content": "General fund allocation discussion",
"ground_truth": "not_oral_health_related"
}
])
# Evaluate
evaluator = AgentEvaluator("policy_classifier")
metrics = evaluator.evaluate_classifier(
model_uri=f"models:/{CATALOG}.{SCHEMA}.policy_classifier/{version}",
test_documents=eval_data[["document_id", "title", "content"]].to_dict('records'),
ground_truth=eval_data["ground_truth"].tolist()
)
print(f"\n📊 Evaluation Metrics:")
print(f" Accuracy: {metrics.accuracy:.2%}")
print(f" Precision: {metrics.precision:.2%}")
print(f" Recall: {metrics.recall:.2%}")
print(f" F1 Score: {metrics.f1_score:.2%}")
print(f" Avg Latency: {metrics.avg_latency_ms:.0f}ms")
# COMMAND ----------
# MAGIC %md
# MAGIC ## 7. Query Results from Delta Lake
# COMMAND ----------
# Create sample data in Delta Lake
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {CATALOG}.{SCHEMA}.classified_documents (
document_id STRING,
municipality STRING,
state STRING,
meeting_date TIMESTAMP,
primary_topic STRING,
confidence DOUBLE,
relevant_excerpts ARRAY<STRING>,
classification_timestamp TIMESTAMP
)
USING DELTA
PARTITIONED BY (state)
""")
# Query documents by topic
df = spark.sql(f"""
SELECT
state,
primary_topic,
COUNT(*) as document_count,
AVG(confidence) as avg_confidence
FROM {CATALOG}.{SCHEMA}.classified_documents
WHERE primary_topic != 'not_oral_health_related'
GROUP BY state, primary_topic
ORDER BY document_count DESC
""")
display(df)
# COMMAND ----------
# MAGIC %md
# MAGIC ## 8. Create Advocacy Heatmap
# COMMAND ----------
# Query advocacy opportunities
opportunities = spark.sql(f"""
SELECT
state,
municipality,
primary_topic,
confidence,
relevant_excerpts
FROM {CATALOG}.{SCHEMA}.classified_documents
WHERE
primary_topic IN ('water_fluoridation', 'school_dental_screening', 'low_income_dental_funding')
AND confidence > 0.7
ORDER BY confidence DESC
LIMIT 100
""")
# Convert to pandas for visualization
pdf = opportunities.toPandas()
print(f"Found {len(pdf)} advocacy opportunities")
display(pdf.head(10))
# COMMAND ----------
# MAGIC %md
# MAGIC ## 9. Monitor Agent Performance
# COMMAND ----------
# Get endpoint metrics
status = manager.get_endpoint_status("policy-classifier-dev")
print(f"Endpoint Status:")
print(f" Name: {status['name']}")
print(f" State: {status['state']}")
print(f"\nServed Entities:")
for entity in status['served_entities']:
print(f" - {entity['name']} v{entity['version']}: {entity['state']}")
# COMMAND ----------
# MAGIC %md
# MAGIC ## 10. A/B Test Model Versions
# COMMAND ----------
# Compare two versions (if you have multiple)
# comparison = evaluator.compare_versions(
# version_a="1",
# version_b="2",
# eval_data=eval_data
# )
#
# print("Version Comparison:")
# for metric, data in comparison["improvements"].items():
# print(f" {metric}: {data['improvement_pct']:.1f}% improvement")
# COMMAND ----------
# MAGIC %md
# MAGIC ## Next Steps
# MAGIC
# MAGIC 1. **Scale Up**: Process thousands of documents using Spark
# MAGIC 2. **Add Monitoring**: Set up alerts for model drift
# MAGIC 3. **Feedback Loop**: Collect user corrections in Delta Lake
# MAGIC 4. **Multi-Agent**: Deploy sentiment and advocacy writer agents
# MAGIC 5. **Production**: Promote to production endpoint with traffic splitting
# MAGIC
# MAGIC **Resources:**
# MAGIC - [Databricks Agent Framework Docs](https://docs.databricks.com/en/generative-ai/agent-framework/index.html)
# MAGIC - [MLflow Guide](https://mlflow.org/docs/latest/index.html)
# MAGIC - [Unity Catalog](https://docs.databricks.com/en/data-governance/unity-catalog/index.html)