Soto, R. R., Koch, K., Khan, A., Chen, B., Bishop, M., & Andrews, N. (2024). Few-Shot Detection of Machine-Generated Text using Style Representations.
@unpublished{soto2024fewshot,
title = {Few-Shot Detection of Machine-Generated Text using Style Representations},
author = {Soto, Rafael Rivera and Koch, Kailin and Khan, Aleem and Chen, Barry and Bishop, Marcus and Andrews, Nicholas},
year = {2024},
eprint = {2401.06712},
archiveprefix = {arXiv},
primaryclass = {cs.CL}
}
Khan, A., Wang, A., Hager, S., & Andrews, N. (2023). Learning to Generate Text in Arbitrary Writing Styles.
@unpublished{khan2023learning,
title = {Learning to Generate Text in Arbitrary Writing Styles},
author = {Khan, Aleem and Wang, Andrew and Hager, Sophia and Andrews, Nicholas},
year = {2023},
eprint = {2312.17242},
archiveprefix = {arXiv},
primaryclass = {cs.CL}
}
Weller, O., Khan, A., Weir, N., Lawrie, D., & Durme, B. V. (2023). Defending Against Misinformation Attacks in Open-Domain Question Answering.
@unpublished{weller2023defending,
title = {Defending Against Misinformation Attacks in Open-Domain Question Answering},
author = {Weller, Orion and Khan, Aleem and Weir, Nathaniel and Lawrie, Dawn and Durme, Benjamin Van},
year = {2023},
eprint = {2212.10002},
archiveprefix = {arXiv},
primaryclass = {cs.CL}
}
Refereed conference proceedings
Rivera-Soto, R. A., Miano, O. E., Ordonez, J., Chen, B. Y., Khan, A., Bishop, M., & Andrews, N. (2021). Learning Universal Authorship Representations. In M.-F. Moens, X. Huang, L. Specia, & S. W.-tau Yih (Eds.), Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (pp. 913–919). Association for Computational Linguistics. https://aclanthology.org/2021.emnlp-main.70
@inproceedings{rivera-soto-etal-2021-learning,
title = {Learning Universal Authorship Representations},
author = {Rivera-Soto, Rafael A. and Miano, Olivia Elizabeth and Ordonez, Juanita and Chen, Barry Y. and Khan, Aleem and Bishop, Marcus and Andrews, Nicholas},
editor = {Moens, Marie-Francine and Huang, Xuanjing and Specia, Lucia and Yih, Scott Wen-tau},
booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing},
month = nov,
year = {2021},
address = {Online and Punta Cana, Dominican Republic},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2021.emnlp-main.70},
doi = {10.18653/v1/2021.emnlp-main.70},
pages = {913--919}
}
Determining whether two documents were composed by the same author, also known as authorship verification, has traditionally been tackled using statistical methods. Recently, authorship representations learned using neural networks have been found to outperform alternatives, particularly in large-scale settings involving hundreds of thousands of authors. But do such representations learned in a particular domain transfer to other domains? Or are these representations inherently entangled with domain-specific features? To study these questions, we conduct the first large-scale study of cross-domain transfer for authorship verification considering zero-shot transfers involving three disparate domains: Amazon reviews, fanfiction short stories, and Reddit comments. We find that although a surprising degree of transfer is possible between certain domains, it is not so successful between others. We examine properties of these domains that influence generalization and propose simple but effective methods to improve transfer.
Khan, A., Fleming, E., Schofield, N., Bishop, M., & Andrews, N. (2021). A Deep Metric Learning Approach to Account Linking. In K. Toutanova, A. Rumshisky, L. Zettlemoyer, D. Hakkani-Tur, I. Beltagy, S. Bethard, R. Cotterell, T. Chakraborty, & Y. Zhou (Eds.), Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (pp. 5275–5287). Association for Computational Linguistics. https://aclanthology.org/2021.naacl-main.415
@inproceedings{khan-etal-2021-deep,
title = {A Deep Metric Learning Approach to Account Linking},
author = {Khan, Aleem and Fleming, Elizabeth and Schofield, Noah and Bishop, Marcus and Andrews, Nicholas},
editor = {Toutanova, Kristina and Rumshisky, Anna and Zettlemoyer, Luke and Hakkani-Tur, Dilek and Beltagy, Iz and Bethard, Steven and Cotterell, Ryan and Chakraborty, Tanmoy and Zhou, Yichao},
booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
month = jun,
year = {2021},
address = {Online},
publisher = {Association for Computational Linguistics},
url = {https://aclanthology.org/2021.naacl-main.415},
doi = {10.18653/v1/2021.naacl-main.415},
pages = {5275--5287}
}
We consider the task of linking social media accounts that belong to the same author in an automated fashion on the basis of the content and meta-data of the corresponding document streams. We focus on learning an embedding that maps variable-sized samples of user activity–ranging from single posts to entire months of activity–to a vector space, where samples by the same author map to nearby points. Our approach does not require human-annotated data for training purposes, which allows us to leverage large amounts of social media content. The proposed model outperforms several competitive baselines under a novel evaluation framework modeled after established recognition benchmarks in other domains. Our method achieves high linking accuracy, even with small samples from accounts not seen at training time, a prerequisite for practical applications of the proposed linking framework.