@article{ferrand_targeting_2025,
title = {Targeting Alignment: Extracting Safety Classifiers of Aligned {LLMs}},
author = {Noirot Ferrand, Jean-Charles and Beugin, Yohan and Pauley, Eric and Sheatsley, Ryan and McDaniel, Patrick},
journal = {arXiv preprint arXiv:2501.16534},
year = {2025},
doi = {10.48550/arXiv.2501.16534}
}