Spaces:
Running
Running
| @article{grootendorst2022bertopic, | |
| title={{BERTopic}: Neural topic modeling with a class-based {TF-IDF} procedure}, | |
| author={Grootendorst, Maarten}, | |
| journal={arXiv preprint arXiv:2203.05794}, | |
| year={2022}, | |
| doi={10.48550/arXiv.2203.05794} | |
| } | |
| @inproceedings{reimers2019sentence, | |
| title={Sentence-{BERT}: Sentence Embeddings using Siamese {BERT}-Networks}, | |
| author={Reimers, Nils and Gurevych, Iryna}, | |
| booktitle={Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)}, | |
| pages={3982--3992}, | |
| year={2019}, | |
| publisher={Association for Computational Linguistics}, | |
| doi={10.18653/v1/D19-1410} | |
| } | |
| @inproceedings{rehurek2010gensim, | |
| title={Software framework for topic modelling with large corpora}, | |
| author={Řehůřek, Radim and Sojka, Petr}, | |
| booktitle={Proceedings of the LREC 2010 Workshop on New Challenges for NLP Frameworks}, | |
| pages={45--50}, | |
| year={2010} | |
| } | |
| @inproceedings{muennighoff-etal-2023-mteb, | |
| title = "{MTEB}: Massive Text Embedding Benchmark", | |
| author = "Muennighoff, Niklas and | |
| Tazi, Nouamane and | |
| Magne, Loic and | |
| Reimers, Nils", | |
| editor = "Vlachos, Andreas and | |
| Augenstein, Isabelle", | |
| booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics", | |
| month = may, | |
| year = "2023", | |
| address = "Dubrovnik, Croatia", | |
| publisher = "Association for Computational Linguistics", | |
| url = "https://aclanthology.org/2023.eacl-main.148/", | |
| doi = "10.18653/v1/2023.eacl-main.148", | |
| pages = "2014--2037", | |
| abstract = "Text embeddings are commonly evaluated on a small set of datasets from a single task not covering their possible applications to other tasks. It is unclear whether state-of-the-art embeddings on semantic textual similarity (STS) can be equally well applied to other tasks like clustering or reranking. This makes progress in the field difficult to track, as various models are constantly being proposed without proper evaluation. To solve this problem, we introduce the Massive Text Embedding Benchmark (MTEB). MTEB spans 8 embedding tasks covering a total of 58 datasets and 112 languages. Through the benchmarking of 33 models on MTEB, we establish the most comprehensive benchmark of text embeddings todate. We find that no particular text embedding method dominates across all tasks. This suggests that the field has yet to converge on a universal text embedding method and scale it up sufficiently to provide state-of-theart results on all embedding tasks. MTEB comes with open-source code and a public leaderboard at \url{https://github.com/embeddings-benchmark/mteb}." | |
| } | |
| @article{mcinnes2018umap, | |
| title={{UMAP}: Uniform Manifold Approximation and Projection for Dimension Reduction}, | |
| author={McInnes, Leland and Healy, John and Melville, James}, | |
| journal={arXiv preprint arXiv:1802.03426}, | |
| year={2018}, | |
| doi={10.48550/arXiv.1802.03426} | |
| } | |
| @inproceedings{campello2013density, | |
| title={Density-based clustering based on hierarchical density estimates}, | |
| author={Campello, Ricardo JGB and Moulavi, Davoud and Sander, J{\"o}rg}, | |
| booktitle={Pacific-Asia Conference on Knowledge Discovery and Data Mining}, | |
| pages={160--172}, | |
| year={2013}, | |
| publisher={Springer}, | |
| doi={10.1007/978-3-642-37456-2_14} | |
| } | |
| @article{mcinnes2017hdbscan, | |
| title={hdbscan: Hierarchical density based clustering}, | |
| author={McInnes, Leland and Healy, John and Astels, Steve}, | |
| journal={Journal of Open Source Software}, | |
| volume={2}, | |
| number={11}, | |
| pages={205}, | |
| year={2017}, | |
| doi={10.21105/joss.00205} | |
| } | |
| @article{studerus2010psychometric, | |
| title={Psychometric evaluation of the altered states of consciousness rating scale ({OAV})}, | |
| author={Studerus, Erich and Gamma, Alex and Vollenweider, Franz X}, | |
| journal={PLoS ONE}, | |
| volume={5}, | |
| number={8}, | |
| pages={e12412}, | |
| year={2010}, | |
| publisher={Public Library of Science}, | |
| doi={10.1371/journal.pone.0012412} | |
| } | |
| @book{pekala1991quantifying, | |
| title={Quantifying Consciousness: An Empirical Approach}, | |
| author={Pekala, Ronald J}, | |
| year={1991}, | |
| publisher={Plenum Press}, | |
| address={New York}, | |
| doi={10.1007/978-1-4899-0629-8} | |
| } | |
| @article{gamma2021mpe, | |
| title={The {M}inimal {P}henomenal {E}xperience questionnaire ({MPE-92M}): Towards a phenomenological profile of "pure awareness" experiences in meditators}, | |
| author={Gamma, Alex and Metzinger, Thomas}, | |
| journal={PLoS ONE}, | |
| volume={16}, | |
| number={12}, | |
| pages={e0313118}, | |
| year={2021}, | |
| publisher={Public Library of Science}, | |
| doi={10.1371/journal.pone.0313118} | |
| } | |
| @article{blei2003latent, | |
| title={Latent {D}irichlet Allocation}, | |
| author={Blei, David M and Ng, Andrew Y and Jordan, Michael I}, | |
| journal={Journal of Machine Learning Research}, | |
| volume={3}, | |
| pages={993--1022}, | |
| year={2003} | |
| } | |
| @article{beaute2025mosaic, | |
| title={Mapping of Subjective Accounts into Interpreted Clusters ({MOSAIC}): Topic Modelling and {LLM} Applied to Stroboscopic Phenomenology}, | |
| author={Beauté, Romy and Schwartzman, David J and Dumas, Guillaume and Crook, Jennifer and Macpherson, Fiona and Barrett, Adam B and Seth, Anil K}, | |
| journal={arXiv preprint arXiv:2502.18318}, | |
| year={2025}, | |
| doi={10.48550/arXiv.2502.18318} | |
| } | |
| @book{bird2009natural, | |
| title={Natural Language Processing with {P}ython}, | |
| author={Bird, Steven and Klein, Ewan and Loper, Edward}, | |
| year={2009}, | |
| publisher={O'Reilly Media}, | |
| address={Sebastopol, CA} | |
| } | |
| @inproceedings{akiba2019optuna, | |
| title={Optuna: A Next-generation Hyperparameter Optimization Framework}, | |
| author={Akiba, Takuya and Sano, Shotaro and Yanase, Toshihiko and Ohta, Takeru and Koyama, Masanori}, | |
| booktitle={Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, | |
| pages={2623--2631}, | |
| year={2019}, | |
| doi={10.1145/3292500.3330701} | |
| } |