Spaces:

shwethd
/

DecoderModel124M

Sleeping

App Files Files Community

shwethd commited on Nov 14, 2025

Commit

1e393db

verified ·

1 Parent(s): 82f907e

Upload app.py

Browse files

Files changed (1) hide show

app.py +31 -27

app.py CHANGED Viewed

@@ -361,11 +361,11 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
         # Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With")
         generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
-        # Fix 1b: Fix spacing issues like "furt her" -> "further", "T his" -> "This", "y our" -> "your"
         # Remove spaces in the middle of common words
         common_words_fix = [
             'further', 'this', 'that', 'there', 'where', 'here', 'their', 'your', 'our',
-            'man', 'men', 'woman', 'women', 'padua', 'padua', 'content', 'gentle', 'gently',
             'house', 'neck', 'car', 'made', 'lost', 'rough', 'see', 'might', 'any', 'one',
             'well', 'newly', 'too', 'him', 'her', 'them', 'they', 'the', 'and', 'but',
             'for', 'not', 'are', 'was', 'were', 'been', 'have', 'has', 'had', 'will',
@@ -373,24 +373,35 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
             'you', 'me', 'my', 'his', 'hers', 'its', 'our', 'ours', 'yours', 'theirs',
             'into', 'onto', 'upon', 'within', 'without', 'through', 'though', 'although',
             'about', 'above', 'below', 'beside', 'between', 'among', 'during', 'before',
-            'after', 'while', 'until', 'since', 'because', 'though', 'although'
         ]
         for word in common_words_fix:
-            # Pattern: word split incorrectly (e.g., "furt her", "T his", "y our", "a m an", "Padu a")
-            # Handle split at any position
             word_lower = word.lower()
             for i in range(1, len(word_lower)):
                 # Split at position i: first part + space + second part
                 first_part = word_lower[:i]
                 second_part = word_lower[i:]
-                # Pattern: word split at this position (case insensitive)
                 pattern = r'\b' + first_part + r'\s+' + second_part + r'\b'
                 generated_text = re.sub(pattern, word, generated_text, flags=re.IGNORECASE)
-                # Also handle with capital letters (e.g., "Padu a" -> "Padua")
                 pattern_cap = r'\b' + first_part.capitalize() + r'\s+' + second_part + r'\b'
                 generated_text = re.sub(pattern_cap, word.capitalize(), generated_text)
                 pattern_all_cap = r'\b' + first_part.upper() + r'\s+' + second_part.upper() + r'\b'
                 generated_text = re.sub(pattern_all_cap, word.upper(), generated_text)
         # Fix 2: Common word boundaries that got merged (e.g., "perpetualwith" -> "perpetual with")
         # Add space before common words that might have been merged
@@ -406,10 +417,10 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
         # Fix 2d: Fix spacing after commas (e.g., "What,bear" -> "What, bear")
         generated_text = re.sub(r',([a-zA-Z])', r', \1', generated_text)
-        # Fix 2e: Fix merged words that should be separate (e.g., "himt" -> "him to")
         # Common patterns where words got merged incorrectly
-        # Pattern: pronoun + "t" (likely "to" got merged)
         merged_fixes = [
             (r'\bhimt\s+', 'him to '),  # "himt me" -> "him to me"
             (r'\bhert\s+', 'her to '),  # "hert him" -> "her to him"
             (r'\bthemt\s+', 'them to '),  # "themt us" -> "them to us"
@@ -418,6 +429,12 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
             (r'\bhert([,.;:!?])', r'her to\1'),
             (r'\bthemt([,.;:!?])', r'them to\1'),
             (r'\byout([,.;:!?])', r'you to\1'),
         ]
         for pattern, replacement in merged_fixes:
             generated_text = re.sub(pattern, replacement, generated_text, flags=re.IGNORECASE)
@@ -427,6 +444,10 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
         # But if it's "contenton" -> "content on"
         generated_text = re.sub(r'\bcontenton\b', 'content on', generated_text, flags=re.IGNORECASE)
         # Fix 2b: Fix contractions that got merged (e.g., "You'llbe" -> "You'll be")
         # Add space after contractions before lowercase words
         contractions = ["'ll", "'ve", "'re", "'d", "'t", "'s", "'m"]
@@ -643,24 +664,7 @@ with gr.Blocks(title="GPT-2 124M Shakespeare Model") as demo:
             )
     # Example prompts with suggested parameters
-    gr.Markdown("""
-    ### Example Prompts (Click to try - includes optimal settings)
-    **What to Expect:**
-    - **Character prompts** (e.g., "ROMEO:", "HAMLET:"): Generates dialogue in that character's style, typically starting with their speech
-    - **Famous quotes** (e.g., "To be or not"): Continues or expands on the quote in Shakespearean style
-    - **Romantic prompts** (e.g., "JULIET:", "What light through yonder"): Generates romantic dialogue or poetry
-    - **Speech prompts** (e.g., "Friends, Romans, countrymen"): Generates dramatic speeches
-    **Note:** Each example includes pre-configured optimal parameters. The model may generate:
-    - ✅ Shakespearean-style dialogue with proper speaker names
-    - ✅ Theatrical language and phrasing
-    - ⚠️ Some spacing issues (automatically fixed by post-processing)
-    - ⚠️ Occasional repetition (mitigated by repetition penalty)
-    - ⚠️ May not always match exact Shakespeare quotes (model is 124M, not trained to memorize)
-    **Tip:** Try different examples to see how the model adapts to different character styles and contexts!
-    """)
     examples = gr.Examples(
         examples=[
             # Format: [prompt, max_tokens, temperature, top_k, top_p, repetition_penalty]

         # Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With")
         generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
+        # Fix 1b: Fix spacing issues like "furt her" -> "further", "T his" -> "This", "y our" -> "your", "Th at" -> "That"
         # Remove spaces in the middle of common words
         common_words_fix = [
             'further', 'this', 'that', 'there', 'where', 'here', 'their', 'your', 'our',
+            'man', 'men', 'woman', 'women', 'padua', 'content', 'gentle', 'gently',
             'house', 'neck', 'car', 'made', 'lost', 'rough', 'see', 'might', 'any', 'one',
             'well', 'newly', 'too', 'him', 'her', 'them', 'they', 'the', 'and', 'but',
             'for', 'not', 'are', 'was', 'were', 'been', 'have', 'has', 'had', 'will',
             'you', 'me', 'my', 'his', 'hers', 'its', 'our', 'ours', 'yours', 'theirs',
             'into', 'onto', 'upon', 'within', 'without', 'through', 'though', 'although',
             'about', 'above', 'below', 'beside', 'between', 'among', 'during', 'before',
+            'after', 'while', 'until', 'since', 'because', 'together', 'honour', 'honor',
+            'already', 'perfect', 'soul', 'way', 'wounds', 'tears', 'raise', 'call',
+            'citizens', 'senator', 'liked', 'cold', 'incold', 'incwold'
         ]
         for word in common_words_fix:
+            # Pattern: word split incorrectly (e.g., "furt her", "T his", "y our", "a m an", "Padu a", "Th at")
+            # Handle split at any position, including with capital letters
             word_lower = word.lower()
             for i in range(1, len(word_lower)):
                 # Split at position i: first part + space + second part
                 first_part = word_lower[:i]
                 second_part = word_lower[i:]
+                # Pattern 1: lowercase split (e.g., "furt her" -> "further")
                 pattern = r'\b' + first_part + r'\s+' + second_part + r'\b'
                 generated_text = re.sub(pattern, word, generated_text, flags=re.IGNORECASE)
+                # Pattern 2: Capital letter split (e.g., "Th at" -> "That", "T his" -> "This")
                 pattern_cap = r'\b' + first_part.capitalize() + r'\s+' + second_part + r'\b'
                 generated_text = re.sub(pattern_cap, word.capitalize(), generated_text)
+                # Pattern 3: All caps split (e.g., "TH AT" -> "THAT")
                 pattern_all_cap = r'\b' + first_part.upper() + r'\s+' + second_part.upper() + r'\b'
                 generated_text = re.sub(pattern_all_cap, word.upper(), generated_text)
+                # Pattern 4: Mixed case with capital in first part (e.g., "Th at" -> "That")
+                if len(first_part) > 0:
+                    pattern_mixed = r'\b' + first_part[0].upper() + first_part[1:] + r'\s+' + second_part + r'\b'
+                    generated_text = re.sub(pattern_mixed, word.capitalize(), generated_text, flags=re.IGNORECASE)
         # Fix 2: Common word boundaries that got merged (e.g., "perpetualwith" -> "perpetual with")
         # Add space before common words that might have been merged
         # Fix 2d: Fix spacing after commas (e.g., "What,bear" -> "What, bear")
         generated_text = re.sub(r',([a-zA-Z])', r', \1', generated_text)
+        # Fix 2e: Fix merged words that should be separate (e.g., "himt" -> "him to", "incwold" -> "in cold")
         # Common patterns where words got merged incorrectly
         merged_fixes = [
+            # Pronoun + "t" (likely "to" got merged)
             (r'\bhimt\s+', 'him to '),  # "himt me" -> "him to me"
             (r'\bhert\s+', 'her to '),  # "hert him" -> "her to him"
             (r'\bthemt\s+', 'them to '),  # "themt us" -> "them to us"
             (r'\bhert([,.;:!?])', r'her to\1'),
             (r'\bthemt([,.;:!?])', r'them to\1'),
             (r'\byout([,.;:!?])', r'you to\1'),
+            # Other merged patterns
+            (r'\bincwold\b', 'in cold'),  # "incwold" -> "in cold"
+            (r'\bincold\b', 'in cold'),  # "incold" -> "in cold"
+            (r'\blikeled\b', 'liked'),  # "likeled" -> "liked" (or could be "like led" but "liked" is more common)
+            (r'\bh\s+on\s+our\b', 'honour'),  # "h on our" -> "honour"
+            (r'\bh\s+on\s+or\b', 'honor'),  # "h on or" -> "honor" (American spelling)
         ]
         for pattern, replacement in merged_fixes:
             generated_text = re.sub(pattern, replacement, generated_text, flags=re.IGNORECASE)
         # But if it's "contenton" -> "content on"
         generated_text = re.sub(r'\bcontenton\b', 'content on', generated_text, flags=re.IGNORECASE)
+        # Fix 2g: Fix "toget her" -> "together" (but be careful - "get her" is also valid)
+        # Only fix if it's clearly "together" (context-dependent, but "toget her" is likely "together")
+        generated_text = re.sub(r'\btoget\s+her\b', 'together', generated_text, flags=re.IGNORECASE)
         # Fix 2b: Fix contractions that got merged (e.g., "You'llbe" -> "You'll be")
         # Add space after contractions before lowercase words
         contractions = ["'ll", "'ve", "'re", "'d", "'t", "'s", "'m"]
             )
     # Example prompts with suggested parameters
+    gr.Markdown("### Example Prompts (Click to try - includes optimal settings)")
     examples = gr.Examples(
         examples=[
             # Format: [prompt, max_tokens, temperature, top_k, top_p, repetition_penalty]