Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import random | |
| import time | |
| from pathlib import Path | |
| from src.generation.medical_generator import MedicalTextGenerator | |
| # Check for Gemini API key | |
| if not os.getenv('GEMINI_API_KEY'): | |
| print("Please set the GEMINI_API_KEY environment variable:") | |
| print("Windows PowerShell: $env:GEMINI_API_KEY='your-api-key-here'") | |
| print("Windows CMD: set GEMINI_API_KEY=your-api-key-here") | |
| exit(1) | |
| # Ensure the output directory exists | |
| output_dir = Path("data/synthetic") | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| # Initialize the generator | |
| generator = MedicalTextGenerator() | |
| # Define supported record types (using the keys from the generator's templates) | |
| record_types = ["clinical_note", "discharge_summary", "lab_report"] | |
| # Generate 100 mixed records | |
| records = [] | |
| for i in range(100): | |
| # Randomly select record type | |
| record_type = random.choice(record_types) | |
| # Generate record using Hugging Face | |
| try: | |
| record = generator.generate_record(record_type, use_gemini=False) | |
| print(f"Generated record {i+1}/100: {record_type}") | |
| # Append record details | |
| records.append({ | |
| "id": i + 1, | |
| "type": record_type, | |
| "content": record, | |
| "generator": "Hugging Face", | |
| "generated_at": time.strftime("%Y-%m-%d %H:%M:%S") | |
| }) | |
| # Respect rate limits (e.g., 4 seconds between calls) | |
| time.sleep(4) | |
| except Exception as e: | |
| print(f"Error generating record {i+1}: {str(e)}") | |
| continue | |
| # Save records to a JSON file | |
| output_file = output_dir / "synthetic_records.json" | |
| with open(output_file, "w") as f: | |
| json.dump(records, f, indent=2) | |
| print(f"\nGenerated {len(records)} records and saved to {output_file}") |