Spaces:

TheCodeKat
/

Scholar-Sage

Sleeping

App Files Files Community

Scholar-Sage / app.py

TheCodeKat

Add preset configurations for better quality

bbffad2 4 months ago

raw

history blame contribute delete

9.68 kB

	"""
	Scholar Sage - Improved Language Model Web Interface
	Optimized for better text generation quality
	"""

	import torch
	import gradio as gr
	from transformers import AutoTokenizer
	from model.transformer_explained import TinyTransformerLM
	from generation_config import CONFIGS


	class TextGenerator:
	def __init__(self, model_path="models/best_model_FIXED.pt"):
	print("🔄 Loading model...")
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.tokenizer = AutoTokenizer.from_pretrained("gpt2")

	self.model = TinyTransformerLM(
	vocab_size=self.tokenizer.vocab_size,
	d_model=512, n_layers=6, num_heads=8, d_ff=2048, max_len=512
	)
	self.model.load_state_dict(torch.load(model_path, map_location=self.device))
	self.model.to(self.device)
	self.model.eval()

	print(f"✅ Model loaded on {self.device}")

	def generate(self, prompt, max_length=50, temperature=0.7, top_k=40,
	top_p=0.9, repetition_penalty=1.3, num_return_sequences=1):
	"""Generate text with optimized sampling."""

	# Improved prompt preprocessing
	if not prompt.strip():
	return "⚠️ Please enter a prompt!"

	# Add context hints for better generation
	enhanced_prompt = prompt.strip()

	outputs = []
	for _ in range(num_return_sequences):
	input_ids = self.tokenizer(enhanced_prompt, return_tensors="pt")["input_ids"].to(self.device)

	with torch.no_grad():
	for step in range(max_length):
	logits, _ = self.model(input_ids)
	next_token_logits = logits[:, -1, :].clone()

	# Enhanced repetition penalty
	if repetition_penalty != 1.0:
	for token_id in set(input_ids[0].tolist()):
	if next_token_logits[0, token_id] < 0:
	next_token_logits[0, token_id] *= repetition_penalty
	else:
	next_token_logits[0, token_id] /= repetition_penalty

	next_token_logits = next_token_logits / temperature

	# Top-k filtering
	if top_k > 0:
	indices_to_remove = next_token_logits < torch.topk(
	next_token_logits, min(top_k, next_token_logits.size(-1))
	)[0][..., -1, None]
	next_token_logits[indices_to_remove] = float('-inf')

	# Top-p filtering
	if top_p < 1.0:
	sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
	cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
	sorted_indices_to_remove = cumulative_probs > top_p
	sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
	sorted_indices_to_remove[..., 0] = 0
	indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
	next_token_logits[indices_to_remove] = float('-inf')

	probs = torch.softmax(next_token_logits, dim=-1)
	next_token = torch.multinomial(probs, num_samples=1)
	input_ids = torch.cat([input_ids, next_token], dim=1)

	# Better stopping conditions
	if input_ids.size(1) >= 512:
	break
	if next_token.item() == self.tokenizer.eos_token_id:
	break
	# Stop on double newline for cleaner outputs
	if step > 10 and self.tokenizer.decode(input_ids[0, -2:]) == "\n\n":
	break

	generated_text = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
	outputs.append(generated_text)

	return outputs[0] if num_return_sequences == 1 else "\n\n---\n\n".join(outputs)


	generator = TextGenerator()


	def generate_with_preset(prompt, preset, max_length, custom_temp, custom_top_k,
	custom_top_p, custom_rep_pen, num_outputs):
	"""Generate using preset or custom parameters."""
	if not prompt.strip():
	return "⚠️ Please enter a prompt!"

	# Use preset if selected, otherwise use custom values
	if preset != "custom":
	config = CONFIGS[preset]
	temp = config["temperature"]
	top_k = config["top_k"]
	top_p = config["top_p"]
	rep_pen = config["repetition_penalty"]
	else:
	temp = custom_temp
	top_k = custom_top_k
	top_p = custom_top_p
	rep_pen = custom_rep_pen

	try:
	result = generator.generate(
	prompt=prompt,
	max_length=int(max_length),
	temperature=float(temp),
	top_k=int(top_k),
	top_p=float(top_p),
	repetition_penalty=float(rep_pen),
	num_return_sequences=int(num_outputs)
	)
	return result
	except Exception as e:
	return f"❌ Error: {str(e)}"


	# Build Gradio Interface
	with gr.Blocks(title="Scholar Sage - Improved", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 🎓 Scholar Sage - Language Model (Optimized)

	A 45M parameter transformer trained on WikiText-2. Use presets for best results!

	💡 Tips for Quality Output:
	- Use "Balanced" preset for general use
	- Start with encyclopedia-style prompts (model trained on WikiText)
	- Try longer prompts (10-20 words) for better context
	- Experiment with different presets for different styles
	""")

	with gr.Row():
	with gr.Column(scale=1):
	prompt_input = gr.Textbox(
	label="📝 Enter Your Prompt",
	placeholder="Example: The theory of relativity is a scientific theory that",
	lines=4
	)

	preset_selector = gr.Radio(
	choices=["balanced", "creative", "focused", "factual", "custom"],
	value="balanced",
	label="🎚️ Preset Configuration",
	info="Balanced is recommended for most uses"
	)

	max_length = gr.Slider(
	minimum=20, maximum=150, value=60, step=10,
	label="📏 Max Length (tokens)"
	)

	num_outputs = gr.Slider(
	minimum=1, maximum=3, value=1, step=1,
	label="🔢 Number of Outputs"
	)

	with gr.Accordion("⚙️ Custom Settings", open=False):
	gr.Markdown("Only used when 'custom' preset is selected")
	custom_temp = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature")
	custom_top_k = gr.Slider(0, 100, 40, step=5, label="Top-k")
	custom_top_p = gr.Slider(0.0, 1.0, 0.9, step=0.05, label="Top-p")
	custom_rep_pen = gr.Slider(1.0, 2.0, 1.3, step=0.1, label="Repetition Penalty")

	generate_btn = gr.Button("🚀 Generate", variant="primary", size="lg")

	with gr.Column(scale=1):
	output_text = gr.Textbox(
	label="✨ Generated Text",
	lines=18,
	show_copy_button=True
	)

	# Example prompts optimized for WikiText-2 training
	gr.Markdown("### 💡 Example Prompts (Optimized for this Model)")
	gr.Examples(
	examples=[
	["The history of artificial intelligence began in", "balanced", 60, 0.7, 40, 0.9, 1.3, 1],
	["Python programming language is a high-level", "factual", 60, 0.3, 20, 0.8, 1.4, 1],
	["In the field of quantum mechanics,", "balanced", 60, 0.7, 40, 0.9, 1.3, 1],
	["The United States is a country located in", "factual", 60, 0.3, 20, 0.8, 1.4, 1],
	["Machine learning algorithms can be used to", "balanced", 60, 0.7, 40, 0.9, 1.3, 1],
	],
	inputs=[prompt_input, preset_selector, max_length, custom_temp, custom_top_k,
	custom_top_p, custom_rep_pen, num_outputs],
	)

	generate_btn.click(
	fn=generate_with_preset,
	inputs=[prompt_input, preset_selector, max_length, custom_temp, custom_top_k,
	custom_top_p, custom_rep_pen, num_outputs],
	outputs=output_text
	)

	gr.Markdown("""
	---
	### 📌 Understanding the Presets

	- Balanced (default): Best for general encyclopedia-style text
	- Creative: More diverse outputs, good for storytelling
	- Focused: Deterministic, good for factual content
	- Factual: Highest coherence, lowest creativity
	- Custom: Manual control over all parameters

	### ⚠️ Model Limitations

	This is a 45M parameter research model (vs GPT-3's 175B). It works best with:
	- ✅ Encyclopedia-style content (trained on WikiText-2)
	- ✅ Factual, informative text
	- ✅ Short to medium generations (20-100 tokens)

	It struggles with:
	- ❌ Creative fiction or dialogue
	- ❌ Very long context understanding
	- ❌ Highly specialized technical content
	""")


	if __name__ == "__main__":
	demo.launch()