Shubham170793 commited on
Commit
4d99eea
Β·
verified Β·
1 Parent(s): b24ad85

Create embeddings.py

Browse files
Files changed (1) hide show
  1. src/embeddings.py +37 -0
src/embeddings.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer # πŸ“Œ library
2
+ import numpy as np # πŸ“Œ library (for math)
3
+
4
+ # πŸ“Œ variable: load a free embedding model (runs locally in Codespaces)
5
+ model = SentenceTransformer('all-MiniLM-L6-v2')
6
+
7
+ def generate_embeddings(chunks: list) -> list:
8
+ """
9
+ πŸ“Œ Function: Generate embeddings for a list of text chunks using Hugging Face.
10
+
11
+ Args:
12
+ chunks (list): List of text chunks.
13
+
14
+ Returns:
15
+ list: List of embedding vectors (one per chunk).
16
+ """
17
+ embeddings = model.encode(chunks, convert_to_numpy=True) # numpy array
18
+ return embeddings.tolist() # convert to plain Python list
19
+
20
+
21
+ # ----------------------------
22
+ # OLD OPENAI EMBEDDINGS CODE
23
+ # (kept for reference only)
24
+ # ----------------------------
25
+
26
+ # from openai import OpenAI # πŸ“Œ library + class
27
+ # client = OpenAI() # πŸ“Œ variable (needs API key in env)
28
+
29
+ # def generate_embeddings(chunks: list, model: str = "text-embedding-3-small") -> list:
30
+ # embeddings = [] # πŸ“Œ variable: holds all vectors
31
+ # for chunk in chunks:
32
+ # response = client.embeddings.create( # πŸ“Œ function (method) call
33
+ # input=chunk,
34
+ # model=model
35
+ # )
36
+ # embeddings.append(response.data[0].embedding) # vector = list of floats
37
+ # return embeddings