Spaces:

TheCodeKat
/

Scholar-Sage

Sleeping

App Files Files Community

TheCodeKat commited on Oct 16, 2025

Commit

bbffad2

1 Parent(s): fc8f2d9

Add preset configurations for better quality

Browse files

Files changed (2) hide show

app.py +121 -175
generation_config.py +40 -0

app.py CHANGED Viewed

@@ -1,153 +1,127 @@
 """
-Scholar Sage - Language Model Web Interface
-Interactive text generation using the trained Transformer model
 """
 import torch
 import gradio as gr
 from transformers import AutoTokenizer
 from model.transformer_explained import TinyTransformerLM
 class TextGenerator:
     def __init__(self, model_path="models/best_model_FIXED.pt"):
-        """Initialize the text generator with the trained model."""
         print("🔄 Loading model...")
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        # Load tokenizer
         self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
-        vocab_size = self.tokenizer.vocab_size
-        # Create model with same architecture as training
         self.model = TinyTransformerLM(
-            vocab_size=vocab_size,
-            d_model=512,
-            n_layers=6,
-            num_heads=8,
-            d_ff=2048,
-            max_len=512
         )
-        # Load trained weights
         self.model.load_state_dict(torch.load(model_path, map_location=self.device))
         self.model.to(self.device)
         self.model.eval()
-        total_params = sum(p.numel() for p in self.model.parameters())
-        print(f"✅ Model loaded! ({total_params:,} parameters)")
-        print(f"🖥️  Device: {self.device}")
-    def generate(
-        self,
-        prompt,
-        max_length=50,
-        temperature=0.8,
-        top_k=40,
-        top_p=0.92,
-        repetition_penalty=1.2,
-        num_return_sequences=1
-    ):
-        """
-        Generate text based on the prompt with advanced sampling.
-        Args:
-            prompt: Input text to start generation
-            max_length: Maximum number of tokens to generate
-            temperature: Sampling temperature (higher = more random)
-            top_k: Top-k sampling parameter
-            top_p: Top-p (nucleus) sampling parameter
-            repetition_penalty: Penalty for repeating tokens (>1.0 discourages repetition)
-            num_return_sequences: Number of different outputs to generate
-        """
         if not prompt.strip():
             return "⚠️ Please enter a prompt!"
-        outputs = []
         for _ in range(num_return_sequences):
-            # Tokenize input
-            input_ids = self.tokenizer(prompt, return_tensors="pt")["input_ids"].to(self.device)
-            original_length = input_ids.size(1)
             with torch.no_grad():
                 for step in range(max_length):
-                    # Get logits
                     logits, _ = self.model(input_ids)
                     next_token_logits = logits[:, -1, :].clone()
-                    # Apply repetition penalty
                     if repetition_penalty != 1.0:
                         for token_id in set(input_ids[0].tolist()):
-                            # If score < 0, multiply by penalty (make it more negative)
-                            # If score > 0, divide by penalty (make it smaller)
                             if next_token_logits[0, token_id] < 0:
                                 next_token_logits[0, token_id] *= repetition_penalty
                             else:
                                 next_token_logits[0, token_id] /= repetition_penalty
-                    # Apply temperature
                     next_token_logits = next_token_logits / temperature
-                    # Apply top-k filtering
                     if top_k > 0:
-                        indices_to_remove = next_token_logits < torch.topk(next_token_logits, min(top_k, next_token_logits.size(-1)))[0][..., -1, None]
                         next_token_logits[indices_to_remove] = float('-inf')
-                    # Apply top-p (nucleus) filtering
                     if top_p < 1.0:
                         sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
                         cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
-                        # Remove tokens with cumulative probability above the threshold
                         sorted_indices_to_remove = cumulative_probs > top_p
                         sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
                         sorted_indices_to_remove[..., 0] = 0
                         indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
                         next_token_logits[indices_to_remove] = float('-inf')
-                    # Sample from the filtered distribution
                     probs = torch.softmax(next_token_logits, dim=-1)
                     next_token = torch.multinomial(probs, num_samples=1)
-                    # Append to sequence
                     input_ids = torch.cat([input_ids, next_token], dim=1)
-                    # Early stopping conditions
-                    # Stop if we hit the model's max length
                     if input_ids.size(1) >= 512:
                         break
-                    # Stop if we generate end-of-sequence token
                     if next_token.item() == self.tokenizer.eos_token_id:
                         break
-            # Decode the generated sequence
             generated_text = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
             outputs.append(generated_text)
-        # Return single output or multiple outputs separated
-        if num_return_sequences == 1:
-            return outputs[0]
-        else:
-            return "\n\n" + "="*70 + "\n\n".join(outputs)
-# Initialize generator
 generator = TextGenerator()
-def generate_text(prompt, max_length, temperature, top_k, top_p, repetition_penalty, num_outputs):
-    """Wrapper function for Gradio interface."""
     try:
         result = generator.generate(
             prompt=prompt,
             max_length=int(max_length),
-            temperature=float(temperature),
             top_k=int(top_k),
             top_p=float(top_p),
-            repetition_penalty=float(repetition_penalty),
             num_return_sequences=int(num_outputs)
         )
         return result
@@ -155,133 +129,105 @@ def generate_text(prompt, max_length, temperature, top_k, top_p, repetition_pena
         return f"❌ Error: {str(e)}"
-# Create Gradio interface
-with gr.Blocks(title="Scholar Sage - Language Model", theme=gr.themes.Soft()) as demo:
-    gr.Markdown(
-        """
-        # 🎓 Scholar Sage - Language Model
-        A transformer-based language model trained on WikiText-2 with **causal masking**.
-        **Model Details:**
-        - 45M parameters (6 layers, 512 hidden dim, 8 attention heads)
-        - Trained with proper causal attention masking
-        - Best model from epoch 3/5
-        ⚠️ **Note**: This is a small research model (~45M params vs GPT-3's 175B). For best results:
-        - Use **Repetition Penalty = 1.2-1.5** to prevent repetitive text
-        - Keep prompts clear and specific
-        - Expect limited context understanding compared to large commercial models
-        """
-    )
     with gr.Row():
         with gr.Column(scale=1):
             prompt_input = gr.Textbox(
-                label="📝 Enter your prompt",
-                placeholder="Start typing... (e.g., 'Machine learning is')",
-                lines=3
             )
-            with gr.Accordion("⚙️ Advanced Settings", open=False):
-                max_length = gr.Slider(
-                    minimum=10,
-                    maximum=200,
-                    value=50,
-                    step=10,
-                    label="Max Length (tokens to generate)"
-                )
-                temperature = gr.Slider(
-                    minimum=0.1,
-                    maximum=2.0,
-                    value=0.8,
-                    step=0.1,
-                    label="Temperature (higher = more random)"
-                )
-                top_k = gr.Slider(
-                    minimum=0,
-                    maximum=100,
-                    value=40,
-                    step=5,
-                    label="Top-k (0 = disabled)"
-                )
-                top_p = gr.Slider(
-                    minimum=0.0,
-                    maximum=1.0,
-                    value=0.92,
-                    step=0.02,
-                    label="Top-p / Nucleus Sampling"
-                )
-                repetition_penalty = gr.Slider(
-                    minimum=1.0,
-                    maximum=2.0,
-                    value=1.2,
-                    step=0.1,
-                    label="Repetition Penalty (higher = less repetition)"
-                )
-                num_outputs = gr.Slider(
-                    minimum=1,
-                    maximum=3,
-                    value=1,
-                    step=1,
-                    label="Number of outputs"
-                )
             generate_btn = gr.Button("🚀 Generate", variant="primary", size="lg")
         with gr.Column(scale=1):
             output_text = gr.Textbox(
                 label="✨ Generated Text",
-                lines=15,
                 show_copy_button=True
             )
-    # Examples
-    gr.Markdown("### 💡 Example Prompts")
     gr.Examples(
         examples=[
-            ["Machine learning is", 50, 0.8, 40, 0.92, 1.2, 1],
-            ["The future of artificial intelligence", 50, 0.8, 40, 0.92, 1.2, 1],
-            ["Natural language processing", 50, 0.8, 40, 0.92, 1.2, 1],
-            ["In the field of computer science", 50, 0.8, 40, 0.92, 1.2, 1],
-            ["Researchers have discovered that", 50, 0.8, 40, 0.92, 1.2, 1],
         ],
-        inputs=[prompt_input, max_length, temperature, top_k, top_p, repetition_penalty, num_outputs],
-        outputs=output_text,
-        fn=generate_text,
-        cache_examples=False
     )
-    # Connect the button
     generate_btn.click(
-        fn=generate_text,
-        inputs=[prompt_input, max_length, temperature, top_k, top_p, repetition_penalty, num_outputs],
         outputs=output_text
     )
-    gr.Markdown(
-        """
-        ---
-        **Tips for Better Generation:**
-        - 🌡️ **Temperature**: Lower (0.5-0.7) = more focused, Higher (1.0-1.5) = more creative
-        - 🎯 **Top-k**: Limits vocabulary to top k most likely tokens (try 30-50)
-        - 🔬 **Top-p**: Nucleus sampling - keeps smallest set of tokens with cumulative probability > p (try 0.9-0.95)
-        - 🔁 **Repetition Penalty**: Higher values (1.2-1.5) reduce repetition (IMPORTANT for this model!)
-        **For best results**: Use temperature=0.8, top-k=40, top-p=0.92, repetition_penalty=1.2-1.5
-        """
-    )
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False,
-        show_error=True
-    )

 """
+Scholar Sage - Improved Language Model Web Interface
+Optimized for better text generation quality
 """
 import torch
 import gradio as gr
 from transformers import AutoTokenizer
 from model.transformer_explained import TinyTransformerLM
+from generation_config import CONFIGS
 class TextGenerator:
     def __init__(self, model_path="models/best_model_FIXED.pt"):
         print("🔄 Loading model...")
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
         self.model = TinyTransformerLM(
+            vocab_size=self.tokenizer.vocab_size,
+            d_model=512, n_layers=6, num_heads=8, d_ff=2048, max_len=512
         )
         self.model.load_state_dict(torch.load(model_path, map_location=self.device))
         self.model.to(self.device)
         self.model.eval()
+        print(f"✅ Model loaded on {self.device}")
+    def generate(self, prompt, max_length=50, temperature=0.7, top_k=40,
+                 top_p=0.9, repetition_penalty=1.3, num_return_sequences=1):
+        """Generate text with optimized sampling."""
+        # Improved prompt preprocessing
         if not prompt.strip():
             return "⚠️ Please enter a prompt!"
+        # Add context hints for better generation
+        enhanced_prompt = prompt.strip()
+        outputs = []
         for _ in range(num_return_sequences):
+            input_ids = self.tokenizer(enhanced_prompt, return_tensors="pt")["input_ids"].to(self.device)
             with torch.no_grad():
                 for step in range(max_length):
                     logits, _ = self.model(input_ids)
                     next_token_logits = logits[:, -1, :].clone()
+                    # Enhanced repetition penalty
                     if repetition_penalty != 1.0:
                         for token_id in set(input_ids[0].tolist()):
                             if next_token_logits[0, token_id] < 0:
                                 next_token_logits[0, token_id] *= repetition_penalty
                             else:
                                 next_token_logits[0, token_id] /= repetition_penalty
                     next_token_logits = next_token_logits / temperature
+                    # Top-k filtering
                     if top_k > 0:
+                        indices_to_remove = next_token_logits < torch.topk(
+                            next_token_logits, min(top_k, next_token_logits.size(-1))
+                        )[0][..., -1, None]
                         next_token_logits[indices_to_remove] = float('-inf')
+                    # Top-p filtering
                     if top_p < 1.0:
                         sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
                         cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
                         sorted_indices_to_remove = cumulative_probs > top_p
                         sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
                         sorted_indices_to_remove[..., 0] = 0
                         indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
                         next_token_logits[indices_to_remove] = float('-inf')
                     probs = torch.softmax(next_token_logits, dim=-1)
                     next_token = torch.multinomial(probs, num_samples=1)
                     input_ids = torch.cat([input_ids, next_token], dim=1)
+                    # Better stopping conditions
                     if input_ids.size(1) >= 512:
                         break
                     if next_token.item() == self.tokenizer.eos_token_id:
                         break
+                    # Stop on double newline for cleaner outputs
+                    if step > 10 and self.tokenizer.decode(input_ids[0, -2:]) == "\n\n":
+                        break
             generated_text = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
             outputs.append(generated_text)
+        return outputs[0] if num_return_sequences == 1 else "\n\n---\n\n".join(outputs)
 generator = TextGenerator()
+def generate_with_preset(prompt, preset, max_length, custom_temp, custom_top_k,
+                         custom_top_p, custom_rep_pen, num_outputs):
+    """Generate using preset or custom parameters."""
+    if not prompt.strip():
+        return "⚠️ Please enter a prompt!"
+    # Use preset if selected, otherwise use custom values
+    if preset != "custom":
+        config = CONFIGS[preset]
+        temp = config["temperature"]
+        top_k = config["top_k"]
+        top_p = config["top_p"]
+        rep_pen = config["repetition_penalty"]
+    else:
+        temp = custom_temp
+        top_k = custom_top_k
+        top_p = custom_top_p
+        rep_pen = custom_rep_pen
     try:
         result = generator.generate(
             prompt=prompt,
             max_length=int(max_length),
+            temperature=float(temp),
             top_k=int(top_k),
             top_p=float(top_p),
+            repetition_penalty=float(rep_pen),
             num_return_sequences=int(num_outputs)
         )
         return result
         return f"❌ Error: {str(e)}"
+# Build Gradio Interface
+with gr.Blocks(title="Scholar Sage - Improved", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🎓 Scholar Sage - Language Model (Optimized)
+    A 45M parameter transformer trained on WikiText-2. **Use presets** for best results!
+    💡 **Tips for Quality Output:**
+    - Use **"Balanced" preset** for general use
+    - Start with encyclopedia-style prompts (model trained on WikiText)
+    - Try longer prompts (10-20 words) for better context
+    - Experiment with different presets for different styles
+    """)
     with gr.Row():
         with gr.Column(scale=1):
             prompt_input = gr.Textbox(
+                label="📝 Enter Your Prompt",
+                placeholder="Example: The theory of relativity is a scientific theory that",
+                lines=4
+            )
+            preset_selector = gr.Radio(
+                choices=["balanced", "creative", "focused", "factual", "custom"],
+                value="balanced",
+                label="🎚️ Preset Configuration",
+                info="Balanced is recommended for most uses"
+            )
+            max_length = gr.Slider(
+                minimum=20, maximum=150, value=60, step=10,
+                label="📏 Max Length (tokens)"
+            )
+            num_outputs = gr.Slider(
+                minimum=1, maximum=3, value=1, step=1,
+                label="🔢 Number of Outputs"
             )
+            with gr.Accordion("⚙️ Custom Settings", open=False):
+                gr.Markdown("*Only used when 'custom' preset is selected*")
+                custom_temp = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature")
+                custom_top_k = gr.Slider(0, 100, 40, step=5, label="Top-k")
+                custom_top_p = gr.Slider(0.0, 1.0, 0.9, step=0.05, label="Top-p")
+                custom_rep_pen = gr.Slider(1.0, 2.0, 1.3, step=0.1, label="Repetition Penalty")
             generate_btn = gr.Button("🚀 Generate", variant="primary", size="lg")
         with gr.Column(scale=1):
             output_text = gr.Textbox(
                 label="✨ Generated Text",
+                lines=18,
                 show_copy_button=True
             )
+    # Example prompts optimized for WikiText-2 training
+    gr.Markdown("### 💡 Example Prompts (Optimized for this Model)")
     gr.Examples(
         examples=[
+            ["The history of artificial intelligence began in", "balanced", 60, 0.7, 40, 0.9, 1.3, 1],
+            ["Python programming language is a high-level", "factual", 60, 0.3, 20, 0.8, 1.4, 1],
+            ["In the field of quantum mechanics,", "balanced", 60, 0.7, 40, 0.9, 1.3, 1],
+            ["The United States is a country located in", "factual", 60, 0.3, 20, 0.8, 1.4, 1],
+            ["Machine learning algorithms can be used to", "balanced", 60, 0.7, 40, 0.9, 1.3, 1],
         ],
+        inputs=[prompt_input, preset_selector, max_length, custom_temp, custom_top_k,
+                custom_top_p, custom_rep_pen, num_outputs],
     )
     generate_btn.click(
+        fn=generate_with_preset,
+        inputs=[prompt_input, preset_selector, max_length, custom_temp, custom_top_k,
+                custom_top_p, custom_rep_pen, num_outputs],
         outputs=output_text
     )
+    gr.Markdown("""
+    ---
+    ### 📌 Understanding the Presets
+    - **Balanced** (default): Best for general encyclopedia-style text
+    - **Creative**: More diverse outputs, good for storytelling
+    - **Focused**: Deterministic, good for factual content
+    - **Factual**: Highest coherence, lowest creativity
+    - **Custom**: Manual control over all parameters
+    ### ⚠️ Model Limitations
+    This is a 45M parameter research model (vs GPT-3's 175B). It works best with:
+    - ✅ Encyclopedia-style content (trained on WikiText-2)
+    - ✅ Factual, informative text
+    - ✅ Short to medium generations (20-100 tokens)
+    It struggles with:
+    - ❌ Creative fiction or dialogue
+    - ❌ Very long context understanding
+    - ❌ Highly specialized technical content
+    """)
 if __name__ == "__main__":
+    demo.launch()

generation_config.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# Optimized Generation Configurations for Different Use Cases
+CONFIGS = {
+    "creative": {
+        "temperature": 0.9,
+        "top_k": 50,
+        "top_p": 0.95,
+        "repetition_penalty": 1.1,
+        "description": "More creative and diverse outputs"
+    },
+    "balanced": {
+        "temperature": 0.7,
+        "top_k": 40,
+        "top_p": 0.9,
+        "repetition_penalty": 1.3,
+        "description": "Balanced creativity and coherence (recommended)"
+    },
+    "focused": {
+        "temperature": 0.5,
+        "top_k": 30,
+        "top_p": 0.85,
+        "repetition_penalty": 1.5,
+        "description": "More focused and deterministic"
+    },
+    "factual": {
+        "temperature": 0.3,
+        "top_k": 20,
+        "top_p": 0.8,
+        "repetition_penalty": 1.4,
+        "description": "Best for encyclopedia-style content"
+    }
+}
+# Better prompts for small models
+PROMPT_TEMPLATES = {
+    "article": "Wikipedia article about {topic}:\n\n",
+    "definition": "{term} is defined as",
+    "explanation": "Here is an explanation of {topic}:\n\n",
+    "continuation": "{text}"
+}