Spaces:
Sleeping
Sleeping
fix(ui): remove experimental webgpu tab and switch semantic cache back to stable numpy arrays
0e0459c | import os | |
| import json | |
| import numpy as np | |
| import logging | |
| from sentence_transformers import SentenceTransformer | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| MODEL_NAME = "i-dot-ai/all-miniLM-L6-v2-UKPGA-6k-finetune" | |
| CACHE_FILE = os.path.join(os.path.dirname(__file__), "nursing_sections.json") | |
| EMBEDDINGS_FILE = os.path.join(os.path.dirname(__file__), "nursing_sections_embeddings.npy") | |
| def build(): | |
| logger.info("Loading model...") | |
| model = SentenceTransformer(MODEL_NAME) | |
| logger.info("Loading sections...") | |
| with open(CACHE_FILE, "r", encoding="utf-8") as f: | |
| sections = json.load(f) | |
| corpus_texts = [] | |
| for s in sections: | |
| leg_id = s.get("legislation_id", "") | |
| act_name = leg_id.split("/")[-2] if "/" in leg_id else leg_id | |
| content = f"Act: {act_name}. Section {s.get('number', '')}: {s.get('title', '')}. {s.get('text', '')}" | |
| corpus_texts.append(content) | |
| logger.info(f"Encoding {len(corpus_texts)} sections...") | |
| # Get numpy arrays instead of tensors | |
| embeddings = model.encode(corpus_texts, convert_to_numpy=True, show_progress_bar=True) | |
| logger.info("Saving numpy embeddings to file...") | |
| np.save(EMBEDDINGS_FILE, embeddings) | |
| logger.info("Done!") | |
| if __name__ == "__main__": | |
| build() | |