@article{etourneau_penalized_2025,
	title = {Penalized likelihood optimization for censored missing value imputation in proteomics},
	volume = {26},
	issn = {1465-4644},
	url = {https://doi.org/10.1093/biostatistics/kxaf006},
	doi = {10.1093/biostatistics/kxaf006},
	abstract = {Label-free bottom-up proteomics using mass spectrometry and liquid chromatography has long been established as one of the most popular high-throughput analysis workflows for proteome characterization. However, it produces data hindered by complex and heterogeneous missing values, which imputation has long remained problematic. To cope with this, we introduce Pirat, an algorithm that harnesses this challenge using an original likelihood maximization strategy. Notably, it models the instrument limit by learning a global censoring mechanism from the data available. Moreover, it estimates the covariance matrix between enzymatic cleavage products (ie peptides or precursor ions), while offering a natural way to integrate complementary transcriptomic information when multi-omic assays are available. Our benchmarking on several datasets covering a variety of experimental designs (number of samples, acquisition mode, missingness patterns, etc.) and using a variety of metrics (differential analysis ground truth or imputation errors) shows that Pirat outperforms all pre-existing imputation methods. Beyond the interest of Pirat as an imputation tool, these results pinpoint the need for a paradigm change in proteomics imputation, as most pre-existing strategies could be boosted by incorporating similar models to account for the instrument censorship or for the correlation structures, either grounded to the analytical pipeline or arising from a multi-omic approach.},
	pages = {kxaf006},
	number = {1},
	journaltitle = {Biostatistics},
	shortjournal = {Biostatistics},
	author = {Etourneau, Lucas and Fancello, Laura and Wieczorek, Samuel and Varoquaux, Nelle and Burger, Thomas},
	urldate = {2025-12-02},
	date = {2025-01-01}
}

@article{Chen2014,
abstract = {Missing data rates could depend on the targeted values in many settings, including mass spectrometry-based proteomic profiling studies. Here, we consider mean and covariance estimation under a multivariate Gaussian distribution with non-ignorable missingness, including scenarios in which the dimension (p) of the response vector is equal to or greater than the number (n) of independent observations. A parameter estimation procedure is developed by maximizing a class of penalized likelihood functions that entails explicit modeling of missing data probabilities. The performance of the resulting "penalized EM algorithm incorporating missing data mechanism (PEMM)" estimation procedure is evaluated in simulation studies and in a proteomic data illustration. {\textcopyright} 2014, The International Biometric Society.},
author = {Chen, Lin S. and Prentice, Ross L. and Wang, Pei},
doi = {10.1111/BIOM.12149},
file = {:C\:/Users/DELL/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Chen, Prentice, Wang - 2014 - A penalized EM algorithm incorporating missing data mechanism for Gaussian parameter estimation.pdf:pdf;:C\:/Users/DELL/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Chen, Prentice, Wang - 2014 - A penalized EM algorithm incorporating missing data mechanism for Gaussian parameter estimation(2).pdf:pdf},
issn = {1541-0420},
journal = {Biometrics},
keywords = {Expectation,Maximum penalized likelihood estimate,Not,at,maximization (EM) algorithm,missing,random (NMAR)},
mendeley-groups = {multi-omics,Papers bib/ECML 2022,only proteomic imputation,imputation},
month = {jun},
number = {2},
pages = {312--322},
publisher = {John Wiley & Sons, Ltd},
title = {{A penalized EM algorithm incorporating missing data mechanism for Gaussian parameter estimation}},
url = {https://onlinelibrary.wiley.com/doi/full/10.1111/biom.12149 https://onlinelibrary.wiley.com/doi/abs/10.1111/biom.12149 https://onlinelibrary.wiley.com/doi/10.1111/biom.12149},
volume = {70},
year = {2014}
}

