| %% LaTeX2e file `references.bib' |
| %% generated by the `filecontents' environment |
| %% from source `template' on 2024/08/08. |
| %% |
| @book{goodfellow2016deep, |
| title={Deep learning}, |
| author={Goodfellow, Ian and Bengio, Yoshua and Courville, Aaron and Bengio, Yoshua}, |
| volume={1}, |
| year={2016}, |
| publisher={MIT Press} |
| } |
| |
| @article{vaswani2017attention, |
| title={Attention is all you need}, |
| author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia}, |
| journal={Advances in neural information processing systems}, |
| volume={30}, |
| year={2017} |
| } |
| |
| @article{karpathy2023nanogpt, |
| title = {nanoGPT}, |
| author = {Karpathy, Andrej}, |
| year = {2023}, |
| journal = {URL https://github.com/karpathy/nanoGPT/tree/master}, |
| note = {GitHub repository} |
| } |
| |
| @article{kingma2014adam, |
| title={Adam: A method for stochastic optimization}, |
| author={Kingma, Diederik P and Ba, Jimmy}, |
| journal={arXiv preprint arXiv:1412.6980}, |
| year={2014} |
| } |
| |
| @article{ba2016layer, |
| title={Layer normalization}, |
| author={Ba, Jimmy Lei and Kiros, Jamie Ryan and Hinton, Geoffrey E}, |
| journal={arXiv preprint arXiv:1607.06450}, |
| year={2016} |
| } |
| |
| @article{loshchilov2017adamw, |
| title={Decoupled weight decay regularization}, |
| author={Loshchilov, Ilya and Hutter, Frank}, |
| journal={arXiv preprint arXiv:1711.05101}, |
| year={2017} |
| } |
| |
| @article{radford2019language, |
| title={Language Models are Unsupervised Multitask Learners}, |
| author={Radford, Alec and Wu, Jeff and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya}, |
| year={2019} |
| } |
| |
| @article{bahdanau2014neural, |
| title={Neural machine translation by jointly learning to align and translate}, |
| author={Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua}, |
| journal={arXiv preprint arXiv:1409.0473}, |
| year={2014} |
| } |
| |
| @article{paszke2019pytorch, |
| title={Pytorch: An imperative style, high-performance deep learning library}, |
| author={Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and others}, |
| journal={Advances in neural information processing systems}, |
| volume={32}, |
| year={2019} |
| } |
| |
| @misc{gpt4, |
| title={GPT-4 Technical Report}, |
| author={OpenAI}, |
| year={2024}, |
| eprint={2303.08774}, |
| archivePrefix={arXiv}, |
| primaryClass={cs.CL}, |
| url={https://arxiv.org/abs/2303.08774}, |
| } |
| |
| @Article{Shen2017StyleTF, |
| author = {T. Shen and Tao Lei and R. Barzilay and T. Jaakkola}, |
| booktitle = {Neural Information Processing Systems}, |
| journal = {ArXiv}, |
| title = {Style Transfer from Non-Parallel Text by Cross-Alignment}, |
| volume = {abs/1705.09655}, |
| year = {2017} |
| } |
| |
| |
| @Article{Pfeiffer2020AdapterFusionNT, |
| author = {Jonas Pfeiffer and Aishwarya Kamath and Andreas Rücklé and Kyunghyun Cho and Iryna Gurevych}, |
| booktitle = {Conference of the European Chapter of the Association for Computational Linguistics}, |
| journal = {ArXiv}, |
| title = {AdapterFusion: Non-Destructive Task Composition for Transfer Learning}, |
| volume = {abs/2005.00247}, |
| year = {2020} |
| } |
| |
| |
| @Book{Meng2023SIGGRAPH2C, |
| author = {Chenlin Meng and Jiaming Song and Shuang Li and Jun-Yan Zhu and Stefano Ermon and Tsung-Yi Lin and Chen-Hsuan Lin and Karsten Kreis}, |
| booktitle = {SIGGRAPH Courses}, |
| journal = {ACM SIGGRAPH 2023 Courses}, |
| title = {SIGGRAPH 2023 Course on Diffusion Models}, |
| year = {2023} |
| } |
| |
| |
| @Article{Keskar2019CTRLAC, |
| author = {N. Keskar and Bryan McCann and L. Varshney and Caiming Xiong and R. Socher}, |
| booktitle = {arXiv.org}, |
| journal = {ArXiv}, |
| title = {CTRL: A Conditional Transformer Language Model for Controllable Generation}, |
| volume = {abs/1909.05858}, |
| year = {2019} |
| } |
| |
| |
| @Article{Keskar2019CTRLAC, |
| author = {N. Keskar and Bryan McCann and L. Varshney and Caiming Xiong and R. Socher}, |
| booktitle = {arXiv.org}, |
| journal = {ArXiv}, |
| title = {CTRL: A Conditional Transformer Language Model for Controllable Generation}, |
| volume = {abs/1909.05858}, |
| year = {2019} |
| } |
| |
| |
| @Article{Keskar2019CTRLAC, |
| author = {N. Keskar and Bryan McCann and L. Varshney and Caiming Xiong and R. Socher}, |
| booktitle = {arXiv.org}, |
| journal = {ArXiv}, |
| title = {CTRL: A Conditional Transformer Language Model for Controllable Generation}, |
| volume = {abs/1909.05858}, |
| year = {2019} |
| } |
| |
| |
| @Article{Keskar2019CTRLAC, |
| author = {N. Keskar and Bryan McCann and L. Varshney and Caiming Xiong and R. Socher}, |
| booktitle = {arXiv.org}, |
| journal = {ArXiv}, |
| title = {CTRL: A Conditional Transformer Language Model for Controllable Generation}, |
| volume = {abs/1909.05858}, |
| year = {2019} |
| } |
| |
| |
| @Article{Keskar2019CTRLAC, |
| author = {N. Keskar and Bryan McCann and L. Varshney and Caiming Xiong and R. Socher}, |
| booktitle = {arXiv.org}, |
| journal = {ArXiv}, |
| title = {CTRL: A Conditional Transformer Language Model for Controllable Generation}, |
| volume = {abs/1909.05858}, |
| year = {2019} |
| } |
| |
| |