Upload use.py

462502d verified 3 days ago

4.76 kB

	# Import required libraries
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# Configuration
	MODEL_PATH = "VDrontV2-mini-fp16/" # Path to your local HF model folder
	TEMPERATURE = 0.5 # Sampling temperature (lower = more deterministic)
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Use GPU if available


	def load_model_and_tokenizer(model_path: str):
	"""
	Load the pretrained model and tokenizer from the local folder.

	Args:
	model_path (str): Path to the local model directory.

	Returns:
	model, tokenizer: Loaded model and tokenizer instances.
	"""
	print(f"Loading model from {model_path}...")
	print(f"Using device: {DEVICE}")

	# Load tokenizer (trust_remote_code=False as we are using a local standard model)
	tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=False)

	# Load model with automatic device mapping and half-precision if GPU is available
	model = AutoModelForCausalLM.from_pretrained(
	model_path,
	torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
	device_map="auto",
	trust_remote_code=False
	)

	# Set pad token if not already set (helps with generation)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	print("Model and tokenizer loaded successfully!\n")
	return model, tokenizer


	def generate_response(model, tokenizer, prompt: str, temperature: float = 0.4, max_new_tokens: int = 256):
	"""
	Generate a response from the model given a user prompt.

	Args:
	model: The loaded language model.
	tokenizer: The tokenizer for encoding/decoding text.
	prompt (str): User input string.
	temperature (float): Sampling temperature (higher = more random).
	max_new_tokens (int): Maximum number of new tokens to generate.

	Returns:
	str: Model's generated response (without the input prompt).
	"""
	# Encode the prompt to input IDs
	inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)

	# Move inputs to the same device as the model
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	# Generate response with specified temperature
	with torch.no_grad(): # Disable gradient calculation for inference
	output_ids = model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	do_sample=True, # Enable sampling to use temperature
	top_p=0.95, # Nucleus sampling for diversity
	repetition_penalty=1.1, # Slight penalty to avoid loops
	pad_token_id=tokenizer.pad_token_id,
	eos_token_id=tokenizer.eos_token_id
	)

	# Decode only the newly generated tokens (exclude input prompt)
	input_length = inputs["input_ids"].shape[1]
	new_tokens = output_ids[0][input_length:]
	response = tokenizer.decode(new_tokens, skip_special_tokens=True)

	return response.strip()


	def interactive_chat(model, tokenizer, temperature: float):
	"""
	Run an infinite interactive chat loop.

	Args:
	model: Loaded language model.
	tokenizer: Tokenizer for the model.
	temperature (float): Sampling temperature for generation.
	"""
	print("=" * 60)
	print("🤖 Chat with VDrone V2 Mini2 (Temperature = {})".format(temperature))
	print("Type 'exit', 'quit', or press Ctrl+C to stop the conversation.")
	print("=" * 60)

	while True:
	try:
	# Get user input
	user_input = input("\n👤 You: ").strip()

	# Exit conditions
	if user_input.lower() in ["exit", "quit"]:
	print("🤖 Assistant: Goodbye! Have a great day!")
	break

	# Skip empty inputs
	if not user_input:
	continue

	# Generate and print model response
	response = generate_response(model, tokenizer, user_input, temperature=temperature)
	print(f"🤖 Assistant: {response}")

	except KeyboardInterrupt:
	print("\n\n🤖 Assistant: Conversation interrupted. Goodbye!")
	break
	except Exception as e:
	print(f"⚠️ Error occurred: {e}")
	print("Continuing chat...")


	# Main entry point
	if __name__ == "__main__":
	# Load model and tokenizer from the specified folder
	model, tokenizer = load_model_and_tokenizer(MODEL_PATH)

	# Start the interactive chat with temperature 0.4
	interactive_chat(model, tokenizer, temperature=TEMPERATURE)