Spaces:

Bordoglor
/

Ramzes

Configuration error

Ramzes / examples /xlora /xlora_inference_mistralrs.py

Upload folder using huggingface_hub

302920f verified 2 months ago

905 Bytes

	from mistralrs import ChatCompletionRequest, Runner, Which


	runner = Runner(
	which=Which.XLora(
	tok_model_id=None, # Automatically determine from ordering file
	model_id=..., # Model ID of the base model (local path of HF model ID)
	xlora_model_id=..., # X-LoRA Model ID of the base model (local path of HF model ID)
	order=..., # Ordering file to ensure compatability with PEFT
	tgt_non_granular_index=3, # Only generate scalings for the first 3 decoding tokens, and then use the last generated one
	)
	)

	res = runner.send_chat_completion_request(
	ChatCompletionRequest(
	model="mistral",
	messages=[{"role": "user", "content": "Tell me a story about 2 low rank matrices."}],
	max_tokens=256,
	presence_penalty=1.0,
	top_p=0.1,
	temperature=0.5,
	)
	)
	print(res.choices[0].message.content)
	print(res.usage)