Upload ./gradio_app.py with huggingface_hub

3bcfe8f verified 2 months ago

4.34 kB

	"""
	Gradio Web UI for Vadakayil LLM
	Upload this to Hugging Face Spaces to run interactively
	"""

	import gradio as gr
	import torch
	import json
	import os
	from pathlib import Path

	# Try to import local modules
	try:
	from model import TinyLLM
	from tokenizer import Tokenizer
	LOCAL_MODE = True
	except ImportError:
	LOCAL_MODE = False


	def load_model():
	"""Load the trained model."""
	# Check if running on Spaces
	if os.path.exists("model.pt"):
	model_path = "model.pt"
	tokenizer_path = "tokenizer.json"
	else:
	# Local path
	model_path = "./output/vadakayil_model/model.pt"
	tokenizer_path = "./output/vadakayil_model/tokenizer.json"

	# Load tokenizer
	tokenizer = Tokenizer.load(tokenizer_path)

	# Load model config
	with open("config.json" if os.path.exists("config.json") else "./output/vadakayil_model/config.json") as f:
	config = json.load(f)

	# Create model
	model = TinyLLM(
	vocab_size=config.get("vocab_size", 74),
	d_model=config.get("d_model", 128),
	num_heads=config.get("num_heads", 2),
	num_layers=config.get("num_layers", 2),
	d_ff=config.get("d_ff", 256),
	max_seq_len=config.get("max_seq_len", 512),
	dropout=0.1,
	pad_token_id=0
	)

	# Load weights
	checkpoint = torch.load(model_path, map_location="cpu", weights_only=False)
	model.load_state_dict(checkpoint["model_state_dict"])
	model.eval()

	return model, tokenizer


	def generate_text(prompt, max_tokens, temperature, top_k):
	"""Generate text from prompt."""
	if not hasattr(generate_text, 'model'):
	generate_text.model, generate_text.tokenizer = load_model()

	model = generate_text.model
	tokenizer = generate_text.tokenizer

	# Encode prompt
	input_ids = tokenizer.encode(prompt, add_special_tokens=False)
	input_ids = torch.tensor([input_ids], dtype=torch.long)

	# Get EOS token
	eos_token_id = tokenizer.token_to_id.get(tokenizer.eos_token, None)

	# Generate
	with torch.no_grad():
	output_ids = model.generate(
	input_ids,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_k=top_k if top_k > 0 else None,
	eos_token_id=eos_token_id
	)

	# Decode
	generated_text = tokenizer.decode(output_ids[0].tolist(), skip_special_tokens=True)
	return generated_text


	# Create Gradio Interface
	with gr.Blocks(title="Vadakayil LLM", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🧘 Vadakayil LLM

	A tiny character-level LLM trained on Capt Ajit Vadakayil's writings about:
	- Mach 0.3 and fluid dynamics
	- Consciousness and Vedic philosophy
	- Silent Kalki Revolution
	- Evidence and Witness

	Model: [mountainrock/vadakayil-llm-tiny](https://huggingface.co/mountainrock/vadakayil-llm-tiny)
	""")

	with gr.Row():
	with gr.Column():
	prompt_input = gr.Textbox(
	label="Enter your question",
	placeholder="What is Mach 0.3?",
	lines=3
	)

	with gr.Accordion("Advanced Settings", open=False):
	max_tokens = gr.Slider(50, 300, value=150, step=10, label="Max Tokens")
	temperature = gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Temperature")
	top_k = gr.Slider(0, 100, value=50, step=5, label="Top-K (0 = disabled)")

	generate_btn = gr.Button("Generate", variant="primary")

	with gr.Column():
	output_text = gr.Textbox(label="Generated Answer", lines=5)

	# Example prompts
	gr.Examples(
	examples=[
	"What is Mach 0.3 and why is it significant?",
	"Why is Mach 0.3 called the Paradox Rekha?",
	"What is the Silent Kalki Revolution of Consciousness?",
	"What does the movie Thondi Muthalum Driksakshiyum represent?",
	"What is the paradox of holding on versus letting go?",
	],
	inputs=prompt_input
	)

	generate_btn.click(
	fn=generate_text,
	inputs=[prompt_input, max_tokens, temperature, top_k],
	outputs=output_text
	)


	if __name__ == "__main__":
	demo.launch()