from datasets import load_dataset import pandas as pd import os def fetch_med_dialog_sample(limit=5): """ Fetches a small sample from the MedDialog dataset to use as context. """ print(f"Fetching {limit} dialogues from MedDialog...") try: # Loading a small portion of the English MedDialog dataset dataset = load_dataset("OpenMed/MedDialog", split="train", streaming=True) samples = [] for i, entry in enumerate(dataset): if i >= limit: break samples.append(entry) return samples except Exception as e: print(f"Error fetching dataset: {e}") return [] def format_dialogue_context(samples): context = "Here are some examples of medical dialogues for reference:\n\n" for sample in samples: # Adjust based on actual dataset structure (checking common fields) dialogue = sample.get('utterances', sample.get('description', '')) context += f"- {dialogue}\n" return context if __name__ == "__main__": # Test fetch data = fetch_med_dialog_sample(2) print(format_dialogue_context(data))