Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

Medica_DecisionSupportAI / upload_ingest.py

Rajan Sharma

Update upload_ingest.py

44836be verified 3 months ago

2.83 kB

	# upload_ingest.py
	import pandas as pd
	import os
	from typing import Dict, List, Any

	def extract_text_from_files(file_paths: List[str]) -> Dict[str, Any]:
	"""Extract text and data from uploaded files with healthcare-specific handling."""
	result = {
	"chunks": [],
	"artifacts": [],
	"healthcare_data": {}
	}

	for file_path in file_paths:
	try:
	file_name = os.path.basename(file_path)

	if file_name.endswith('.csv'):
	# Handle CSV files with healthcare data
	df = pd.read_csv(file_path)

	# Extract basic info
	result["chunks"].append(f"File: {file_name}")
	result["chunks"].append(f"Shape: {df.shape}")
	result["chunks"].append(f"Columns: {', '.join(df.columns)}")

	# Healthcare-specific processing
	healthcare_info = {}

	# Check for facility data
	if any(col in df.columns for col in ['facility_name', 'facility_type']):
	healthcare_info['type'] = 'facility_data'
	if 'facility_type' in df.columns:
	healthcare_info['facility_types'] = df['facility_type'].value_counts().to_dict()

	# Check for bed data
	if any(col in df.columns for col in ['beds_current', 'beds_prev']):
	healthcare_info['type'] = 'bed_data'
	if 'zone' in df.columns:
	healthcare_info['zones'] = df['zone'].unique().tolist()

	# Calculate changes if both columns exist
	if 'beds_current' in df.columns and 'beds_prev' in df.columns:
	df['bed_change'] = df['beds_current'] - df['beds_prev']
	healthcare_info['total_change'] = df['bed_change'].sum()

	if healthcare_info:
	result["healthcare_data"][file_name] = healthcare_info

	# Add sample data
	result["artifacts"].append({
	"file": file_name,
	"type": "csv",
	"sample": df.head(3).to_dict('records')
	})

	elif file_name.endswith(('.pdf', '.docx', '.txt')):
	# For text files, just note the file
	result["chunks"].append(f"Document: {file_name}")
	result["artifacts"].append({
	"file": file_name,
	"type": "document"
	})

	except Exception as e:
	result["chunks"].append(f"Error processing {file_path}: {str(e)}")

	return result