|
|
"""
|
|
|
Simple script to analyze healthcare reason data processing
|
|
|
"""
|
|
|
|
|
|
import pandas as pd
|
|
|
import sys
|
|
|
import os
|
|
|
|
|
|
|
|
|
sys.path.append('.')
|
|
|
|
|
|
def test_data_loading():
|
|
|
"""Test loading and processing the healthcare reason data"""
|
|
|
|
|
|
print("Testing Healthcare Reason Data Processing")
|
|
|
print("=" * 40)
|
|
|
|
|
|
|
|
|
try:
|
|
|
df = pd.read_excel('data/reason_for_visit_data.xlsx')
|
|
|
print(f"β
Successfully loaded {len(df)} records")
|
|
|
except Exception as e:
|
|
|
print(f"β Error loading data: {e}")
|
|
|
return False
|
|
|
|
|
|
|
|
|
print(f"\nDataset Info:")
|
|
|
print(f"Shape: {df.shape}")
|
|
|
print(f"Columns: {list(df.columns)}")
|
|
|
|
|
|
|
|
|
print(f"\nTop 10 Reasons for Visit:")
|
|
|
top_reasons = df['Reason For Visit'].value_counts().head(10)
|
|
|
for reason, count in top_reasons.items():
|
|
|
print(f" {reason}: {count}")
|
|
|
|
|
|
|
|
|
def map_reason_to_category(reason: str) -> str:
|
|
|
"""Simple categorization logic"""
|
|
|
reason_lower = reason.lower()
|
|
|
|
|
|
if any(word in reason_lower for word in ['routine', 'nail care', 'calluses']):
|
|
|
return "ROUTINE_CARE"
|
|
|
elif any(word in reason_lower for word in ['pain', 'ache', 'sore']):
|
|
|
return "PAIN_CONDITIONS"
|
|
|
elif any(word in reason_lower for word in ['sprain', 'wound', 'injury']):
|
|
|
return "INJURIES"
|
|
|
elif any(word in reason_lower for word in ['ingrown', 'toenail', 'callus']):
|
|
|
return "SKIN_CONDITIONS"
|
|
|
elif any(word in reason_lower for word in ['flat feet', 'plantar', 'fasciitis', 'achilles']):
|
|
|
return "STRUCTURAL_ISSUES"
|
|
|
elif any(word in reason_lower for word in ['injection', 'surgical', 'consult', 'postop']):
|
|
|
return "PROCEDURES"
|
|
|
else:
|
|
|
return "PAIN_CONDITIONS"
|
|
|
|
|
|
|
|
|
df['Category'] = df['Reason For Visit'].apply(map_reason_to_category)
|
|
|
|
|
|
print(f"\nCategory Distribution:")
|
|
|
category_counts = df['Category'].value_counts()
|
|
|
for category, count in category_counts.items():
|
|
|
percentage = (count / len(df)) * 100
|
|
|
print(f" {category}: {count} ({percentage:.1f}%)")
|
|
|
|
|
|
|
|
|
print(f"\nExample reasons by category:")
|
|
|
for category in category_counts.index:
|
|
|
examples = df[df['Category'] == category]['Reason For Visit'].head(3).tolist()
|
|
|
print(f" {category}:")
|
|
|
for example in examples:
|
|
|
print(f" - {example}")
|
|
|
|
|
|
return True
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
success = test_data_loading()
|
|
|
if success:
|
|
|
print("\nβ
Healthcare reason data analysis completed successfully!")
|
|
|
else:
|
|
|
print("\nβ Healthcare reason data analysis failed!") |