File size: 1,361 Bytes
4d99eea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from sentence_transformers import SentenceTransformer   # πŸ“Œ library
import numpy as np                                      # πŸ“Œ library (for math)

# πŸ“Œ variable: load a free embedding model (runs locally in Codespaces)
model = SentenceTransformer('all-MiniLM-L6-v2')

def generate_embeddings(chunks: list) -> list:
    """
    πŸ“Œ Function: Generate embeddings for a list of text chunks using Hugging Face.

    Args:
        chunks (list): List of text chunks.

    Returns:
        list: List of embedding vectors (one per chunk).
    """
    embeddings = model.encode(chunks, convert_to_numpy=True)   # numpy array
    return embeddings.tolist()   # convert to plain Python list


# ----------------------------
# OLD OPENAI EMBEDDINGS CODE
# (kept for reference only)
# ----------------------------

# from openai import OpenAI   # πŸ“Œ library + class
# client = OpenAI()           # πŸ“Œ variable (needs API key in env)

# def generate_embeddings(chunks: list, model: str = "text-embedding-3-small") -> list:
#     embeddings = []   # πŸ“Œ variable: holds all vectors
#     for chunk in chunks:
#         response = client.embeddings.create(   # πŸ“Œ function (method) call
#             input=chunk,
#             model=model
#         )
#         embeddings.append(response.data[0].embedding)  # vector = list of floats
#     return embeddings