Spaces:

jackyanghxc
/

PEAllm

Runtime error

App Files Files Community

PEAllm / app.py

jackyanghxc

Upload app.py with huggingface_hub

7157a1c verified 4 months ago

raw

history blame contribute delete

3.51 kB

	import os

	# Prevent bitsandbytes from attempting GPU ops on CPU-only builds
	os.environ["BITSANDBYTES_NOWELCOME"] = "1"
	os.environ["DISABLE_BNB_QUANTIZATION_CHECK"] = "1"

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import spaces

	MODEL_CANDIDATES = [
	"jackyanghxc/PEAllm",
	"Sakjay/Thai-Llama3-8b"
	]


	def _select_device_and_dtype():
	"""Return device map and dtype based on GPU availability."""
	if torch.cuda.is_available():
	return "cuda", torch.float16
	return "cpu", torch.float32


	@spaces.GPU
	def load_model():
	"""Load tokenizer and model when a GPU worker spins up."""
	device_map, dtype = _select_device_and_dtype()
	last_error = None
	for name in MODEL_CANDIDATES:
	try:
	tokenizer = AutoTokenizer.from_pretrained(name)
	model = AutoModelForCausalLM.from_pretrained(
	name,
	torch_dtype=dtype,
	device_map=device_map,
	low_cpu_mem_usage=True
	)
	model.eval()
	return tokenizer, model
	except Exception as exc: # noqa: BLE001
	last_error = exc
	raise RuntimeError(f"Failed to load model: {last_error}")


	def generate_response(message, history):
	"""Generate response from PEAllm."""
	tokenizer, model = load_model()
	try:
	prompt = f"<\|begin_of_text\|>{message}"
	inputs = tokenizer(
	prompt,
	return_tensors="pt",
	truncation=True,
	max_length=512
	)
	if torch.cuda.is_available():
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=200,
	temperature=0.7,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return response.replace(prompt, "").strip()
	except Exception as exc: # noqa: BLE001
	return f"Error: {exc}"


	title = "PEAllm - Thailand Energy AI Assistant"
	description = """
	PEAllm (Provincial Electricity Authority LLM) – Your AI assistant for Thailand's energy sector.

	Ask questions about:
	- EGAT (Electricity Generating Authority of Thailand)
	- MEA (Metropolitan Electricity Authority)
	- PEA (Provincial Electricity Authority)
	- EPPO (Energy Policy and Planning Office)
	- DEDE (Department of Alternative Energy Development)
	- ERC (Energy Regulatory Commission)
	- PTT (PTT Public Company Limited)
	- Thailand energy policies and statistics

	Languages: Thai and English
	"""

	examples = [
	["Tell me about EGAT's power generation capacity"],
	["MEA ให้ข้อมูลลูกค้าอย่างไร?"],
	["What is Thailand's renewable energy target?"],
	["แผน AEDP คืออะไร?"],
	["How many provinces does PEA serve?"],
	["การใช้พลังงานขั้นสุดท้ายของไทยเป็นอย่างไร?"]
	]


	demo = gr.ChatInterface(
	fn=generate_response,
	title=title,
	description=description,
	examples=examples,
	theme="soft",
	)

	demo.css = """
	.gradio-container {
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	}
	.chat-message {
	padding: 10px;
	border-radius: 10px;
	}
	"""


	if __name__ == "__main__":
	demo.launch()