Spaces:

Abhijit-192-168-1-1
/

example_LLM2Vec

Sleeping

App Files Files Community

example_LLM2Vec / app.py

Abhijit-192-168-1-1

modified app.py

498d36d over 1 year ago

raw

history blame contribute delete

1.69 kB

	import os
	import gradio as gr
	from llm2vec import LLM2Vec
	from transformers import AutoTokenizer, AutoModel, AutoConfig
	from peft import PeftModel
	import torch

	torch.backends.cuda.enable_mem_efficient_sdp(False)
	torch.backends.cuda.enable_flash_sdp(False)

	# Read tokens from environment variables
	GROQ_API_KEY = os.getenv('GROQ_API_KEY')
	HF_TOKEN = os.getenv('HF_TOKEN')

	if not GROQ_API_KEY or not HF_TOKEN:
	raise ValueError("GROQ_API_KEY and HF_TOKEN must be set as environment variables.")

	os.environ['GROQ_API_KEY'] = GROQ_API_KEY
	os.environ['HF_TOKEN'] = HF_TOKEN

	# Load tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
	config = AutoConfig.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True)
	model = AutoModel.from_pretrained("McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", trust_remote_code=True, config=config, torch_dtype=torch.bfloat16, device_map="cuda" if torch.cuda.is_available() else "cpu")

	model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp")
	model = model.merge_and_unload()

	# Load unsupervised SimCSE model
	model = PeftModel.from_pretrained(model, "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse")

	# Wrapper for encoding and pooling operations
	l2v = LLM2Vec(model, tokenizer, pooling_mode="mean", max_length=512)

	def encode_texts(input_texts):
	encodings = [l2v.encode(text) for text in input_texts]
	return encodings

	# Define Gradio interface
	iface = gr.Interface(
	fn=encode_texts,
	inputs=gr.Textbox(lines=5, placeholder="Enter texts separated by newlines..."),
	outputs=gr.JSON()
	)

	# Launch Gradio app
	iface.launch(share=True)