Spaces:

taraky
/

Medical_Document_Retrieval

Sleeping

App Files Files Community

Medical_Document_Retrieval / reason_data_analysis.py

taraky

Upload folder using huggingface_hub

b7f3196 verified 4 days ago

raw

history blame

2.93 kB

	"""
	Simple script to analyze healthcare reason data processing
	"""

	import pandas as pd
	import sys
	import os

	# Add current directory to path
	sys.path.append('.')

	def test_data_loading():
	"""Test loading and processing the healthcare reason data"""

	print("Testing Healthcare Reason Data Processing")
	print("=" * 40)

	# Load the data
	try:
	df = pd.read_excel('data/reason_for_visit_data.xlsx')
	print(f"✅ Successfully loaded {len(df)} records")
	except Exception as e:
	print(f"❌ Error loading data: {e}")
	return False

	# Analyze the data
	print(f"\nDataset Info:")
	print(f"Shape: {df.shape}")
	print(f"Columns: {list(df.columns)}")

	# Show reason distribution
	print(f"\nTop 10 Reasons for Visit:")
	top_reasons = df['Reason For Visit'].value_counts().head(10)
	for reason, count in top_reasons.items():
	print(f" {reason}: {count}")

	# Test categorization logic
	def map_reason_to_category(reason: str) -> str:
	"""Simple categorization logic"""
	reason_lower = reason.lower()

	if any(word in reason_lower for word in ['routine', 'nail care', 'calluses']):
	return "ROUTINE_CARE"
	elif any(word in reason_lower for word in ['pain', 'ache', 'sore']):
	return "PAIN_CONDITIONS"
	elif any(word in reason_lower for word in ['sprain', 'wound', 'injury']):
	return "INJURIES"
	elif any(word in reason_lower for word in ['ingrown', 'toenail', 'callus']):
	return "SKIN_CONDITIONS"
	elif any(word in reason_lower for word in ['flat feet', 'plantar', 'fasciitis', 'achilles']):
	return "STRUCTURAL_ISSUES"
	elif any(word in reason_lower for word in ['injection', 'surgical', 'consult', 'postop']):
	return "PROCEDURES"
	else:
	return "PAIN_CONDITIONS" # Default

	# Apply categorization
	df['Category'] = df['Reason For Visit'].apply(map_reason_to_category)

	print(f"\nCategory Distribution:")
	category_counts = df['Category'].value_counts()
	for category, count in category_counts.items():
	percentage = (count / len(df)) * 100
	print(f" {category}: {count} ({percentage:.1f}%)")

	# Show examples for each category
	print(f"\nExample reasons by category:")
	for category in category_counts.index:
	examples = df[df['Category'] == category]['Reason For Visit'].head(3).tolist()
	print(f" {category}:")
	for example in examples:
	print(f" - {example}")

	return True

	if __name__ == "__main__":
	success = test_data_loading()
	if success:
	print("\n✅ Healthcare reason data analysis completed successfully!")
	else:
	print("\n❌ Healthcare reason data analysis failed!")