File size: 2,934 Bytes
b7f3196 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
"""
Simple script to analyze healthcare reason data processing
"""
import pandas as pd
import sys
import os
# Add current directory to path
sys.path.append('.')
def test_data_loading():
"""Test loading and processing the healthcare reason data"""
print("Testing Healthcare Reason Data Processing")
print("=" * 40)
# Load the data
try:
df = pd.read_excel('data/reason_for_visit_data.xlsx')
print(f"✅ Successfully loaded {len(df)} records")
except Exception as e:
print(f"❌ Error loading data: {e}")
return False
# Analyze the data
print(f"\nDataset Info:")
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
# Show reason distribution
print(f"\nTop 10 Reasons for Visit:")
top_reasons = df['Reason For Visit'].value_counts().head(10)
for reason, count in top_reasons.items():
print(f" {reason}: {count}")
# Test categorization logic
def map_reason_to_category(reason: str) -> str:
"""Simple categorization logic"""
reason_lower = reason.lower()
if any(word in reason_lower for word in ['routine', 'nail care', 'calluses']):
return "ROUTINE_CARE"
elif any(word in reason_lower for word in ['pain', 'ache', 'sore']):
return "PAIN_CONDITIONS"
elif any(word in reason_lower for word in ['sprain', 'wound', 'injury']):
return "INJURIES"
elif any(word in reason_lower for word in ['ingrown', 'toenail', 'callus']):
return "SKIN_CONDITIONS"
elif any(word in reason_lower for word in ['flat feet', 'plantar', 'fasciitis', 'achilles']):
return "STRUCTURAL_ISSUES"
elif any(word in reason_lower for word in ['injection', 'surgical', 'consult', 'postop']):
return "PROCEDURES"
else:
return "PAIN_CONDITIONS" # Default
# Apply categorization
df['Category'] = df['Reason For Visit'].apply(map_reason_to_category)
print(f"\nCategory Distribution:")
category_counts = df['Category'].value_counts()
for category, count in category_counts.items():
percentage = (count / len(df)) * 100
print(f" {category}: {count} ({percentage:.1f}%)")
# Show examples for each category
print(f"\nExample reasons by category:")
for category in category_counts.index:
examples = df[df['Category'] == category]['Reason For Visit'].head(3).tolist()
print(f" {category}:")
for example in examples:
print(f" - {example}")
return True
if __name__ == "__main__":
success = test_data_loading()
if success:
print("\n✅ Healthcare reason data analysis completed successfully!")
else:
print("\n❌ Healthcare reason data analysis failed!") |