--- license: apache-2.0 language: - en - zh - ru - es - fr - de - ar - nl - vi - hi - ko - ja - it - id - pt - pl - tr - da - th - sv - fa - uk - cs - 'no' - el - ca - ro - fi - bg - tl - gl - my - hy - km - ne - hu - eu - he - lo - sw - az - lv - si - sk - tg - et - lt - ms - hr - is - sl - sr - ur - bn - af - ta - ka - te - ml - mn - nn - kk - cy - mr - sq - nb - mk - jv - kn - eo - la - gu - uz - am - oc - be - mg - vo - pa - lb - ht - br - ga - xh - tt - bs - yo base_model: - codefuse-ai/F2LLM-v2-0.6B-Preview pipeline_tag: feature-extraction library_name: transformers tags: - sentence-transformers datasets: - codefuse-ai/F2LLM-v2 --- # F2LLM-v2-80M F2LLM-v2 is a family of general-purpose, multilingual embedding models in 8 distinct sizes ranging from 80M to 14B. Trained on a curated composite of 60 million publicly available high-quality data, F2LLM-v2 supports more than 200 languages, with a particular emphasis on previously underserved mid- and low-resource languages. ## Usage ### With Sentence Transformers To encode text with the [Sentence Transformers](https://www.sbert.net/) library: ```python from sentence_transformers import SentenceTransformer model = SentenceTransformer("codefuse-ai/F2LLM-v2-80M", device="cuda:0", model_kwargs={"torch_dtype": "bfloat16"}) # Some sample query and documents query = "What is F2LLM used for?" documents = [ 'We present F2LLM, a family of fully open embedding LLMs that achieve a strong balance between model size, training data, and embedding performance.', 'F2LLM is a model for computing text embeddings that can be used for various NLP tasks such as information retrieval, semantic search, and text classification.', 'F2LLM 是 CodeFuse 开源的系列嵌入模型。', 'F2LLM — это модель вычисления встраивания текста, которую можно использовать для различных задач НЛП, таких как поиск информации, семантический поиск и классификация текста.' ] # Encode the query and documents separately. The encode_query method uses the query prompt query_embedding = model.encode_query(query) document_embeddings = model.encode_document(documents) print(query_embedding.shape, document_embeddings.shape) # (320,) (4, 320) # Compute cosine similarity between the query and documents similarity = model.similarity(query_embedding, document_embeddings) print(similarity) # tensor([[0.6968, 0.7818, 0.7165, 0.8374]]) ``` ### With Transformers Or directly with the [Transformers](https://huggingface.co/docs/transformers/index) library: ```python from transformers import AutoModel, AutoTokenizer import torch import torch.nn.functional as F model_path = "codefuse-ai/F2LLM-v2-80M" tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModel.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map={'': 0}) query = "What is F2LLM used for?" query_prompt = "Instruct: Given a question, retrieve passages that can help answer the question.\nQuery: " documents = [ 'We present F2LLM, a family of fully open embedding LLMs that achieve a strong balance between model size, training data, and embedding performance.', 'F2LLM is a model for computing text embeddings that can be used for various NLP tasks such as information retrieval, semantic search, and text classification.', 'F2LLM 是 CodeFuse 开源的系列嵌入模型。', 'F2LLM — это модель вычисления встраивания текста, которую можно использовать для различных задач НЛП, таких как поиск информации, семантический поиск и классификация текста.' ] def encode(sentences): batch_size = len(sentences) # the tokenizer will automatically add eos token tokenized_inputs = tokenizer(sentences, padding=True, return_tensors='pt').to(model.device) last_hidden_state = model(**tokenized_inputs).last_hidden_state eos_positions = tokenized_inputs.attention_mask.sum(dim=1) - 1 embeddings = last_hidden_state[torch.arange(batch_size, device=model.device), eos_positions] embeddings = F.normalize(embeddings, p=2, dim=1) return embeddings # Encode the query and documents query_embedding = encode([query_prompt + query]) document_embeddings = encode(documents) print(query_embedding.shape, document_embeddings.shape) # torch.Size([1, 320]) torch.Size([4, 320]) # Compute cosine similarity between the query and documents similarity = query_embedding @ document_embeddings.T print(similarity) # tensor([[0.6914, 0.7812, 0.7148, 0.8359]], device='cuda:0', # dtype=torch.bfloat16, grad_fn=) ```