@misc{karamiSynEHRgySynthesizingMixedType2024, title = {{SynEHRgy}: Synthesizing Mixed-Type Structured Electronic Health Records using Decoder-Only Transformers}, url = {http://arxiv.org/abs/2411.13428}, doi = {10.48550/arXiv.2411.13428}, shorttitle = {{SynEHRgy}}, abstract = {Generating synthetic Electronic Health Records ({EHRs}) offers significant potential for data augmentation, privacy-preserving data sharing, and improving machine learning model training. We propose a novel tokenization strategy tailored for structured {EHR} data, which encompasses diverse data types such as covariates, {ICD} codes, and irregularly sampled time series. Using a {GPT}-like decoder-only transformer model, we demonstrate the generation of high-quality synthetic {EHRs}. Our approach is evaluated using the {MIMIC}-{III} dataset, and we benchmark the fidelity, utility, and privacy of the generated data against state-of-the-art models.}, number = {{arXiv}:2411.13428}, publisher = {{arXiv}}, author = {Karami, Hojjat and Atienza, David and Ionescu, Anisoara}, urldate = {2026-06-17}, date = {2024-11-20}, eprinttype = {arxiv}, eprint = {2411.13428 [cs.LG]}, keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, preprint}, }