Spaces:
Sleeping
Sleeping
| # upload_ingest.py | |
| import pandas as pd | |
| import os | |
| from typing import Dict, List, Any | |
| def extract_text_from_files(file_paths: List[str]) -> Dict[str, Any]: | |
| """Extract text and data from uploaded files with healthcare-specific handling.""" | |
| result = { | |
| "chunks": [], | |
| "artifacts": [], | |
| "healthcare_data": {} | |
| } | |
| for file_path in file_paths: | |
| try: | |
| file_name = os.path.basename(file_path) | |
| if file_name.endswith('.csv'): | |
| # Handle CSV files with healthcare data | |
| df = pd.read_csv(file_path) | |
| # Extract basic info | |
| result["chunks"].append(f"File: {file_name}") | |
| result["chunks"].append(f"Shape: {df.shape}") | |
| result["chunks"].append(f"Columns: {', '.join(df.columns)}") | |
| # Healthcare-specific processing | |
| healthcare_info = {} | |
| # Check for facility data | |
| if any(col in df.columns for col in ['facility_name', 'facility_type']): | |
| healthcare_info['type'] = 'facility_data' | |
| if 'facility_type' in df.columns: | |
| healthcare_info['facility_types'] = df['facility_type'].value_counts().to_dict() | |
| # Check for bed data | |
| if any(col in df.columns for col in ['beds_current', 'beds_prev']): | |
| healthcare_info['type'] = 'bed_data' | |
| if 'zone' in df.columns: | |
| healthcare_info['zones'] = df['zone'].unique().tolist() | |
| # Calculate changes if both columns exist | |
| if 'beds_current' in df.columns and 'beds_prev' in df.columns: | |
| df['bed_change'] = df['beds_current'] - df['beds_prev'] | |
| healthcare_info['total_change'] = df['bed_change'].sum() | |
| if healthcare_info: | |
| result["healthcare_data"][file_name] = healthcare_info | |
| # Add sample data | |
| result["artifacts"].append({ | |
| "file": file_name, | |
| "type": "csv", | |
| "sample": df.head(3).to_dict('records') | |
| }) | |
| elif file_name.endswith(('.pdf', '.docx', '.txt')): | |
| # For text files, just note the file | |
| result["chunks"].append(f"Document: {file_name}") | |
| result["artifacts"].append({ | |
| "file": file_name, | |
| "type": "document" | |
| }) | |
| except Exception as e: | |
| result["chunks"].append(f"Error processing {file_path}: {str(e)}") | |
| return result | |