Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from llm2vec import LLM2Vec | |
| from transformers import AutoTokenizer, AutoModel, AutoConfig | |
| from peft import PeftModel | |
| import torch | |
| torch.backends.cuda.enable_mem_efficient_sdp(False) | |
| torch.backends.cuda.enable_flash_sdp(False) | |
| # Read tokens from environment variables | |
| GROQ_API_KEY = os.getenv('GROQ_API_KEY') | |
| HF_TOKEN = os.getenv('HF_TOKEN') | |
| if not GROQ_API_KEY or not HF_TOKEN: | |
| raise ValueError("GROQ_API_KEY and HF_TOKEN must be set as environment variables.") | |
| os.environ['GROQ_API_KEY'] = GROQ_API_KEY | |
| os.environ['HF_TOKEN'] = HF_TOKEN | |
| # Load tokenizer and model | |
| tokenizer = AutoTokenizer.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp") | |
| config = AutoConfig.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True) | |
| model = AutoModel.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True, config=config, torch_dtype=torch.bfloat16, device_map="cuda" if torch.cuda.is_available() else "cpu") | |
| model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp") | |
| model = model.merge_and_unload() | |
| # Load unsupervised SimCSE model | |
| model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse") | |
| # Wrapper for encoding and pooling operations | |
| l2v = LLM2Vec(model, tokenizer, pooling_mode="mean", max_length=512) | |
| def encode_texts(input_texts): | |
| encodings = [l2v.encode(text) for text in input_texts] | |
| return encodings | |
| # Define Gradio interface | |
| iface = gr.Interface( | |
| fn=encode_texts, | |
| inputs=gr.Textbox(lines=5, placeholder="Enter texts separated by newlines..."), | |
| outputs=gr.JSON() | |
| ) | |
| # Launch Gradio app | |
| iface.launch(share=True) | |