Spaces:

tminh
/

nexus

Paused

nexus / llama_2_inference.py

minhtcai

add interface

17b826e over 2 years ago

1.51 kB

	# -- coding: utf-8 --
	"""Llama 2 Inference.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1tS9ep-M5slbxKrGP2btamFUhMM00QkKt

	# Fine-tune Llama 2 in Google Colab
	> 🗣️ Large Language Model Course

	❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da). Special thanks to Tolga HOŞGÖR for his solution to empty the VRAM.

	This notebook runs on a T4 GPU. (Last update: 24 Aug 2023)
	"""

	!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

	import os
	import torch
	from datasets import load_dataset
	from transformers import (
	AutoModelForCausalLM,
	AutoTokenizer,
	BitsAndBytesConfig,
	HfArgumentParser,
	TrainingArguments,
	pipeline,
	logging,
	)
	from peft import LoraConfig, PeftModel
	from trl import SFTTrainer

	model = AutoModelForCausalLM.from_pretrained("tminh/llama-2-7b-glenda")

	model_name = "TinyPixel/Llama-2-7B-bf16-sharded"
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

	# Ignore warnings
	logging.set_verbosity(logging.CRITICAL)

	# Run text generation pipeline with our next model
	prompt = "What can drug D07OAC do?"
	pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
	result = pipe(f"<s>[INST] {prompt} [/INST]")
	print(result[0]['generated_text'])