| import json |
| import faiss |
| import numpy as np |
| from sentence_transformers import SentenceTransformer |
| from scrape_courses import all_course_details |
|
|
| |
| model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
| |
| def store_in_faiss(course_details): |
| titles = [course["title"] for course in course_details] |
| descriptions = [course["description"] for course in course_details] |
|
|
| |
| combined_texts = [title + " " + description for title, description in zip(titles, descriptions)] |
|
|
| |
| embeddings = model.encode(combined_texts) |
|
|
| |
| embeddings = np.array(embeddings).astype("float32") |
|
|
| |
| dimension = embeddings.shape[1] |
| index = faiss.IndexFlatL2(dimension) |
|
|
| |
| index.add(embeddings) |
|
|
| return index |
|
|
| |
| faiss_index = store_in_faiss(all_course_details) |
|
|
| |
| faiss.write_index(faiss_index, "course_faiss.index") |
|
|
| print("Indexing completed. FAISS index saved to 'course_faiss.index'.") |
|
|