phy_dig_twin / data_generators.py
cryogenic22's picture
Create data_generators.py
43ed365 verified
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
def generate_physician_segments():
"""Generate simulated physician segment data"""
segments = [
"High Volume PCPs",
"Early Adopter Endocrinologists",
"Conservative PCPs",
"Academic Endocrinologists",
"Urban Health System PCPs",
"Rural Independent PCPs",
"Diabetes-Focused PCPs",
"Cardiologists with Diabetes Interest",
"Nurse Practitioners in Primary Care",
"Physician Assistants in Endocrinology"
]
data = []
for segment in segments:
data.append({
"Segment": segment,
"Size": random.randint(1000, 15000),
"Prescribing Volume": random.randint(50, 200),
"Digital Engagement": random.uniform(0.1, 0.9),
"XenoGlip Affinity": random.uniform(0.2, 0.8),
"Message Receptivity": random.uniform(0.3, 0.9)
})
return pd.DataFrame(data)
def generate_prescription_data():
"""Generate simulated prescription data for the past year"""
# Create date range for the past year
end_date = datetime.now()
start_date = end_date - timedelta(days=365)
dates = pd.date_range(start=start_date, end=end_date, freq='W')
# Competitors
competitors = ["XenoGlip", "CompDPP4", "GLP1-A", "GLP1-B", "SGLT2-A", "SGLT2-B"]
# Generate data
data = []
for date in dates:
# Base values
base_values = {
"XenoGlip": 8000 + random.randint(-500, 500),
"CompDPP4": 12000 + random.randint(-800, 800),
"GLP1-A": 9000 + random.randint(-600, 600),
"GLP1-B": 7500 + random.randint(-500, 500),
"SGLT2-A": 11000 + random.randint(-700, 700),
"SGLT2-B": 6500 + random.randint(-400, 400)
}
# Add trend over time
week_num = (date - start_date).days / 7
growth_factor = 1 + (week_num / 52) * 0.15 # 15% annual growth for XenoGlip
base_values["XenoGlip"] = int(base_values["XenoGlip"] * growth_factor)
# Add data points
for comp in competitors:
data.append({
"Date": date,
"Product": comp,
"Prescriptions": base_values[comp]
})
return pd.DataFrame(data)
def generate_key_drivers():
"""Generate key prescription drivers data"""
drivers = [
"Efficacy in A1C reduction",
"Safety profile",
"Tolerability",
"Once-daily dosing",
"Formulary status",
"Patient cost",
"Cardiovascular benefits",
"Weight neutrality",
"Renal considerations",
"Low hypoglycemia risk"
]
segments = ["PCP", "Endocrinologist", "Cardiologist"]
data = []
for driver in drivers:
for segment in segments:
data.append({
"Driver": driver,
"Segment": segment,
"Importance": random.uniform(0.5, 0.95)
})
return pd.DataFrame(data)
def generate_regional_data():
"""Generate regional prescription data"""
regions = ["Northeast", "Southeast", "Midwest", "Southwest", "West"]
data = []
for region in regions:
data.append({
"Region": region,
"Market Share": random.uniform(0.05, 0.25),
"Growth Rate": random.uniform(-0.05, 0.15),
"Prescription Volume": random.randint(5000, 20000),
"Physician Adoption": random.uniform(0.2, 0.6)
})
return pd.DataFrame(data)
def generate_formulary_scenario_data():
"""Generate formulary scenario impact data"""
scenarios = [
"Current (Tier 3, PA required)",
"Tier 2, PA required",
"Tier 3, No PA",
"Tier 2, No PA",
"Tier 1, No PA"
]
impact_metrics = ["New Rx Growth", "Overall Share", "Switch from Competitors", "Adherence"]
data = []
baselines = {
"New Rx Growth": 0.0,
"Overall Share": 0.11,
"Switch from Competitors": 0.0,
"Adherence": 0.68
}
# Improvements for each scenario, relative to baseline
improvements = {
"Tier 2, PA required": {"New Rx Growth": 0.15, "Overall Share": 0.02, "Switch from Competitors": 0.08, "Adherence": 0.03},
"Tier 3, No PA": {"New Rx Growth": 0.22, "Overall Share": 0.015, "Switch from Competitors": 0.12, "Adherence": 0.05},
"Tier 2, No PA": {"New Rx Growth": 0.35, "Overall Share": 0.04, "Switch from Competitors": 0.25, "Adherence": 0.08},
"Tier 1, No PA": {"New Rx Growth": 0.65, "Overall Share": 0.07, "Switch from Competitors": 0.38, "Adherence": 0.12}
}
for scenario in scenarios:
for metric in impact_metrics:
if scenario == "Current (Tier 3, PA required)":
value = baselines[metric]
else:
value = baselines[metric] + improvements[scenario][metric]
data.append({
"Scenario": scenario,
"Metric": metric,
"Value": value
})
return pd.DataFrame(data)
def generate_message_testing_data():
"""Generate message testing data"""
messages = [
"Once-daily dosing for simplicity",
"Proven efficacy in A1C reduction",
"Established cardiovascular safety",
"Minimal hypoglycemia risk",
"Suitable for renal impairment patients",
"Weight neutral option",
"Extensive clinical experience"
]
segments = ["High Volume PCPs", "Early Adopter Endocrinologists", "Conservative PCPs", "Academic Endocrinologists"]
data = []
for message in messages:
for segment in segments:
data.append({
"Message": message,
"Segment": segment,
"Receptivity": random.uniform(0.3, 0.9),
"Impact Score": random.uniform(2.5, 9.5)
})
return pd.DataFrame(data)
def generate_patient_profile_data():
"""Generate patient profile data"""
# Patient profiles
profiles = []
# Age groups
age_groups = ["30-45", "46-60", "61-75", "76+"]
# Comorbidities
comorbidities = ["Hypertension", "Obesity", "Dyslipidemia", "CKD", "CVD", "None"]
# A1C ranges
a1c_ranges = ["<7.0", "7.0-7.9", "8.0-8.9", "9.0+"]
# Medications
current_meds = ["Metformin only", "Met+SU", "Met+DPP4", "Met+SGLT2", "Met+GLP1", "Complex regimen"]
# Generate 50 profiles
for i in range(50):
profile = {
"ID": i + 1,
"Age Group": random.choice(age_groups),
"Gender": random.choice(["Male", "Female"]),
"BMI Category": random.choice(["Normal", "Overweight", "Obese", "Severely Obese"]),
"A1C Range": random.choice(a1c_ranges),
"Primary Comorbidity": random.choice(comorbidities),
"Secondary Comorbidity": random.choice(comorbidities),
"Current Medication": random.choice(current_meds),
"Insurance": random.choice(["Commercial", "Medicare", "Medicaid", "Uninsured"]),
"Years with T2DM": random.randint(1, 20)
}
profiles.append(profile)
return pd.DataFrame(profiles)
def generate_competitive_analysis_data():
"""Generate competitive analysis data"""
products = [
"XenoGlip (DPP-4)",
"CompDPP4",
"GLP1-A",
"GLP1-B",
"SGLT2-A",
"SGLT2-B"
]
attributes = [
"A1C Reduction",
"Weight Effect",
"Hypoglycemia Risk",
"Cardiovascular Benefit",
"Renal Benefit",
"GI Side Effects",
"Injection Required",
"Cost to Patient",
"Formulary Status"
]
# Values for each product-attribute combination
values = {
"XenoGlip (DPP-4)": {
"A1C Reduction": 0.7,
"Weight Effect": 0.0,
"Hypoglycemia Risk": 0.05,
"Cardiovascular Benefit": 0.0,
"Renal Benefit": 0.1,
"GI Side Effects": 0.1,
"Injection Required": 0.0,
"Cost to Patient": 0.5,
"Formulary Status": 0.6
},
"CompDPP4": {
"A1C Reduction": 0.65,
"Weight Effect": 0.0,
"Hypoglycemia Risk": 0.05,
"Cardiovascular Benefit": 0.0,
"Renal Benefit": 0.1,
"GI Side Effects": 0.1,
"Injection Required": 0.0,
"Cost to Patient": 0.5,
"Formulary Status": 0.7
},
"GLP1-A": {
"A1C Reduction": 1.2,
"Weight Effect": -0.8,
"Hypoglycemia Risk": 0.1,
"Cardiovascular Benefit": 0.8,
"Renal Benefit": 0.5,
"GI Side Effects": 0.7,
"Injection Required": 1.0,
"Cost to Patient": 0.85,
"Formulary Status": 0.5
},
"GLP1-B": {
"A1C Reduction": 1.4,
"Weight Effect": -0.9,
"Hypoglycemia Risk": 0.1,
"Cardiovascular Benefit": 0.8,
"Renal Benefit": 0.6,
"GI Side Effects": 0.8,
"Injection Required": 1.0,
"Cost to Patient": 0.9,
"Formulary Status": 0.4
},
"SGLT2-A": {
"A1C Reduction": 0.8,
"Weight Effect": -0.5,
"Hypoglycemia Risk": 0.05,
"Cardiovascular Benefit": 0.7,
"Renal Benefit": 0.8,
"GI Side Effects": 0.2,
"Injection Required": 0.0,
"Cost to Patient": 0.7,
"Formulary Status": 0.6
},
"SGLT2-B": {
"A1C Reduction": 0.7,
"Weight Effect": -0.4,
"Hypoglycemia Risk": 0.05,
"Cardiovascular Benefit": 0.6,
"Renal Benefit": 0.7,
"GI Side Effects": 0.2,
"Injection Required": 0.0,
"Cost to Patient": 0.6,
"Formulary Status": 0.5
}
}
data = []
for product in products:
for attribute in attributes:
data.append({
"Product": product,
"Attribute": attribute,
"Value": values[product][attribute]
})
return pd.DataFrame(data)