Spaces:

zurd46
/

eliAI_demo

Paused

App Files Files Community

eliAI_demo / app.py

zurd46

Update app.py

e4e460e verified over 1 year ago

raw

history blame contribute delete

5.93 kB

	# Import necessary libraries
	import os
	from threading import Thread
	import argparse
	import torch
	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer, AutoModelForCausalLM
	from peft import PeftConfig, PeftModel
	from utils import get_device # Assuming this function exists
	from huggingface_hub import login

	# Authenticate using Hugging Face API token from environment variable
	hf_api_token = os.getenv("HF_API_TOKEN")
	if hf_api_token is None:
	raise ValueError("Hugging Face API token not found in environment variables. Please set it as a secret in Hugging Face Spaces.")
	login(token=hf_api_token)

	# Create the parser
	parser = argparse.ArgumentParser(description='Check model usage.')

	# Add the arguments
	parser.add_argument('--baseonly', action='store_true',
	help='A boolean switch to indicate base only mode')

	# Execute the parse_args() method
	args = parser.parse_args()

	# Define model and adapter names, data type, and quantization type
	model_name = "microsoft/Phi-3-mini-4k-instruct"
	adapters_name = "zurd46/eliAI"
	torch_dtype = torch.bfloat16 # Set the appropriate torch data type

	# Display device and CPU thread information
	device = get_device()
	print(f"Number of GPUs available: {torch.cuda.device_count()}")
	print(f"Running on device: {device}")
	print(f"CPU threads: {torch.get_num_threads()}")

	# Check if CUDA is available and set the device accordingly
	if not torch.cuda.is_available():
	raise RuntimeError("CUDA is not available. Ensure that a GPU is available and properly configured.")

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Load base model
	model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch_dtype)
	model.resize_token_embeddings(len(tokenizer))

	# Load adapter if available and not baseonly
	usingAdapter = False
	if not args.baseonly:
	usingAdapter = True
	model = PeftModel.from_pretrained(model, adapters_name)

	model.to(device)

	print(f"Model {model_name} loaded successfully on {device}")

	# Function to run the text generation process
	def run_generation(user_text, top_p, temperature, top_k, max_new_tokens):
	template = "<\|context\|><\|user\|>\n{}<\|end\|>\n<\|assistant\|>"
	model_inputs = tokenizer(template.format(user_text) if usingAdapter else user_text, return_tensors="pt")
	model_inputs = model_inputs.to(device)

	# Generate text in a separate thread
	streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
	generate_kwargs = dict(
	input_ids=model_inputs['input_ids'],
	streamer=streamer,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	top_p=top_p,
	temperature=float(temperature),
	top_k=top_k,
	pad_token_id=tokenizer.pad_token_id,
	eos_token_id=tokenizer.eos_token_id
	)
	t = Thread(target=model.generate, kwargs=generate_kwargs)
	t.start()

	# Retrieve and yield the generated text
	model_output = ""
	for new_text in streamer:
	model_output += new_text
	return model_output

	# Gradio UI setup
	with gr.Blocks(css="""
	.form.svelte-sfqy0y {
	background: var(--block-background-fill);
	padding: 20px;
	}
	body {
	font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
	color: #e0e0e0;
	margin: 0;
	padding: 0;
	box-sizing: border-box;
	}
	.gradio-container {
	max-width: 900px;
	margin: auto;
	padding: 20px;
	border-radius: 8px;
	box-shadow: 0 0 10px rgba(0,0,0,0.5);
	}
	.gr-button {
	color: white;
	border: none;
	border-radius: 4px;
	padding: 10px 24px;
	cursor: pointer;
	}
	.gr-button:hover {
	background-color: #3700b3;
	}
	.gr-slider input[type=range] {
	-webkit-appearance: none;
	width: 100%;
	height: 8px;
	border-radius: 5px;
	outline: none;
	opacity: 0.9;
	-webkit-transition: .2s;
	transition: opacity .2s;
	}
	.gr-slider input[type=range]:hover {
	opacity: 1;
	}
	.gr-textbox {
	color: white;
	border: none;
	border-radius: 4px;
	padding: 10px;
	}
	.chatbox {
	max-height: 400px;
	overflow-y: auto;
	margin-bottom: 20px;
	}
	""") as demo:
	gr.Markdown(
	"""
	<div style="text-align: center; padding: 20px;">
	<h1>🌙 eliAI Text Generation Interface</h1>
	<h3>Model: Phi-3-mini-4k-instruct</h3>
	<h4>Developed by Daniel Zurmühle</h4>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=3):
	user_text = gr.Textbox(placeholder="Enter your question here", label="User Input", lines=3, elem_classes="gr-textbox")
	button_submit = gr.Button(value="Submit", elem_classes="gr-button")

	max_new_tokens = gr.Slider(minimum=1, maximum=1000, value=1000, step=1, label="Max New Tokens")
	top_p = gr.Slider(minimum=0.05, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)")
	top_k = gr.Slider(minimum=1, maximum=50, value=50, step=1, label="Top-k")
	temperature = gr.Slider(minimum=0.1, maximum=5.0, value=0.8, step=0.1, label="Temperature")

	with gr.Column(scale=7):
	model_output = gr.Chatbot(label="Chatbot Output", height=566)

	def handle_submit(text, top_p, temperature, top_k, max_new_tokens):
	response = run_generation(text, top_p, temperature, top_k, max_new_tokens)
	return [(text, response)]

	button_submit.click(handle_submit, [user_text, top_p, temperature, top_k, max_new_tokens], model_output)
	user_text.submit(handle_submit, [user_text, top_p, temperature, top_k, max_new_tokens], model_output)

	demo.queue(max_size=32).launch(server_name="0.0.0.0", server_port=7860)