Spaces:

TheCodeKat
/

Scholar-Sage

Sleeping

App Files Files Community

TheCodeKat commited on Oct 16, 2025

Commit

94aff5c

verified ·

1 Parent(s): 841149c

Upload 2 files

Browse files

Files changed (2) hide show

app.py +287 -0
requirements.txt +11 -0

app.py ADDED Viewed

	@@ -0,0 +1,287 @@

+"""
+Scholar Sage - Language Model Web Interface
+Interactive text generation using the trained Transformer model
+"""
+import torch
+import gradio as gr
+from transformers import AutoTokenizer
+from model.transformer_explained import TinyTransformerLM
+class TextGenerator:
+    def __init__(self, model_path="models/best_model_FIXED.pt"):
+        """Initialize the text generator with the trained model."""
+        print("🔄 Loading model...")
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Load tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained("gpt2")
+        vocab_size = self.tokenizer.vocab_size
+        # Create model with same architecture as training
+        self.model = TinyTransformerLM(
+            vocab_size=vocab_size,
+            d_model=512,
+            n_layers=6,
+            num_heads=8,
+            d_ff=2048,
+            max_len=512
+        )
+        # Load trained weights
+        self.model.load_state_dict(torch.load(model_path, map_location=self.device))
+        self.model.to(self.device)
+        self.model.eval()
+        total_params = sum(p.numel() for p in self.model.parameters())
+        print(f"✅ Model loaded! ({total_params:,} parameters)")
+        print(f"🖥️  Device: {self.device}")
+    def generate(
+        self,
+        prompt,
+        max_length=50,
+        temperature=0.8,
+        top_k=40,
+        top_p=0.92,
+        repetition_penalty=1.2,
+        num_return_sequences=1
+    ):
+        """
+        Generate text based on the prompt with advanced sampling.
+        Args:
+            prompt: Input text to start generation
+            max_length: Maximum number of tokens to generate
+            temperature: Sampling temperature (higher = more random)
+            top_k: Top-k sampling parameter
+            top_p: Top-p (nucleus) sampling parameter
+            repetition_penalty: Penalty for repeating tokens (>1.0 discourages repetition)
+            num_return_sequences: Number of different outputs to generate
+        """
+        if not prompt.strip():
+            return "⚠️ Please enter a prompt!"
+        outputs = []
+        for _ in range(num_return_sequences):
+            # Tokenize input
+            input_ids = self.tokenizer(prompt, return_tensors="pt")["input_ids"].to(self.device)
+            original_length = input_ids.size(1)
+            with torch.no_grad():
+                for step in range(max_length):
+                    # Get logits
+                    logits, _ = self.model(input_ids)
+                    next_token_logits = logits[:, -1, :].clone()
+                    # Apply repetition penalty
+                    if repetition_penalty != 1.0:
+                        for token_id in set(input_ids[0].tolist()):
+                            # If score < 0, multiply by penalty (make it more negative)
+                            # If score > 0, divide by penalty (make it smaller)
+                            if next_token_logits[0, token_id] < 0:
+                                next_token_logits[0, token_id] *= repetition_penalty
+                            else:
+                                next_token_logits[0, token_id] /= repetition_penalty
+                    # Apply temperature
+                    next_token_logits = next_token_logits / temperature
+                    # Apply top-k filtering
+                    if top_k > 0:
+                        indices_to_remove = next_token_logits < torch.topk(next_token_logits, min(top_k, next_token_logits.size(-1)))[0][..., -1, None]
+                        next_token_logits[indices_to_remove] = float('-inf')
+                    # Apply top-p (nucleus) filtering
+                    if top_p < 1.0:
+                        sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
+                        cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
+                        # Remove tokens with cumulative probability above the threshold
+                        sorted_indices_to_remove = cumulative_probs > top_p
+                        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
+                        sorted_indices_to_remove[..., 0] = 0
+                        indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+                        next_token_logits[indices_to_remove] = float('-inf')
+                    # Sample from the filtered distribution
+                    probs = torch.softmax(next_token_logits, dim=-1)
+                    next_token = torch.multinomial(probs, num_samples=1)
+                    # Append to sequence
+                    input_ids = torch.cat([input_ids, next_token], dim=1)
+                    # Early stopping conditions
+                    # Stop if we hit the model's max length
+                    if input_ids.size(1) >= 512:
+                        break
+                    # Stop if we generate end-of-sequence token
+                    if next_token.item() == self.tokenizer.eos_token_id:
+                        break
+            # Decode the generated sequence
+            generated_text = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
+            outputs.append(generated_text)
+        # Return single output or multiple outputs separated
+        if num_return_sequences == 1:
+            return outputs[0]
+        else:
+            return "\n\n" + "="*70 + "\n\n".join(outputs)
+# Initialize generator
+generator = TextGenerator()
+def generate_text(prompt, max_length, temperature, top_k, top_p, repetition_penalty, num_outputs):
+    """Wrapper function for Gradio interface."""
+    try:
+        result = generator.generate(
+            prompt=prompt,
+            max_length=int(max_length),
+            temperature=float(temperature),
+            top_k=int(top_k),
+            top_p=float(top_p),
+            repetition_penalty=float(repetition_penalty),
+            num_return_sequences=int(num_outputs)
+        )
+        return result
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(title="Scholar Sage - Language Model", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🎓 Scholar Sage - Language Model
+        A transformer-based language model trained on WikiText-2 with **causal masking**.
+        **Model Details:**
+        - 45M parameters (6 layers, 512 hidden dim, 8 attention heads)
+        - Trained with proper causal attention masking
+        - Best model from epoch 3/5
+        ⚠️ **Note**: This is a small research model (~45M params vs GPT-3's 175B). For best results:
+        - Use **Repetition Penalty = 1.2-1.5** to prevent repetitive text
+        - Keep prompts clear and specific
+        - Expect limited context understanding compared to large commercial models
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            prompt_input = gr.Textbox(
+                label="📝 Enter your prompt",
+                placeholder="Start typing... (e.g., 'Machine learning is')",
+                lines=3
+            )
+            with gr.Accordion("⚙️ Advanced Settings", open=False):
+                max_length = gr.Slider(
+                    minimum=10,
+                    maximum=200,
+                    value=50,
+                    step=10,
+                    label="Max Length (tokens to generate)"
+                )
+                temperature = gr.Slider(
+                    minimum=0.1,
+                    maximum=2.0,
+                    value=0.8,
+                    step=0.1,
+                    label="Temperature (higher = more random)"
+                )
+                top_k = gr.Slider(
+                    minimum=0,
+                    maximum=100,
+                    value=40,
+                    step=5,
+                    label="Top-k (0 = disabled)"
+                )
+                top_p = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.92,
+                    step=0.02,
+                    label="Top-p / Nucleus Sampling"
+                )
+                repetition_penalty = gr.Slider(
+                    minimum=1.0,
+                    maximum=2.0,
+                    value=1.2,
+                    step=0.1,
+                    label="Repetition Penalty (higher = less repetition)"
+                )
+                num_outputs = gr.Slider(
+                    minimum=1,
+                    maximum=3,
+                    value=1,
+                    step=1,
+                    label="Number of outputs"
+                )
+            generate_btn = gr.Button("🚀 Generate", variant="primary", size="lg")
+        with gr.Column(scale=1):
+            output_text = gr.Textbox(
+                label="✨ Generated Text",
+                lines=15,
+                show_copy_button=True
+            )
+    # Examples
+    gr.Markdown("### 💡 Example Prompts")
+    gr.Examples(
+        examples=[
+            ["Machine learning is", 50, 0.8, 40, 0.92, 1.2, 1],
+            ["The future of artificial intelligence", 50, 0.8, 40, 0.92, 1.2, 1],
+            ["Natural language processing", 50, 0.8, 40, 0.92, 1.2, 1],
+            ["In the field of computer science", 50, 0.8, 40, 0.92, 1.2, 1],
+            ["Researchers have discovered that", 50, 0.8, 40, 0.92, 1.2, 1],
+        ],
+        inputs=[prompt_input, max_length, temperature, top_k, top_p, repetition_penalty, num_outputs],
+        outputs=output_text,
+        fn=generate_text,
+        cache_examples=False
+    )
+    # Connect the button
+    generate_btn.click(
+        fn=generate_text,
+        inputs=[prompt_input, max_length, temperature, top_k, top_p, repetition_penalty, num_outputs],
+        outputs=output_text
+    )
+    gr.Markdown(
+        """
+        ---
+        **Tips for Better Generation:**
+        - 🌡️ **Temperature**: Lower (0.5-0.7) = more focused, Higher (1.0-1.5) = more creative
+        - 🎯 **Top-k**: Limits vocabulary to top k most likely tokens (try 30-50)
+        - 🔬 **Top-p**: Nucleus sampling - keeps smallest set of tokens with cumulative probability > p (try 0.9-0.95)
+        - 🔁 **Repetition Penalty**: Higher values (1.2-1.5) reduce repetition (IMPORTANT for this model!)
+        **For best results**: Use temperature=0.8, top-k=40, top-p=0.92, repetition_penalty=1.2-1.5
+        """
+    )
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+torch
+transformers>=4.30
+datasets
+sentence-transformers
+tokenizers
+huggingface-hub
+gradio
+fastapi
+uvicorn
+matplotlib
+PyQt5