shwethd commited on
Commit
1e393db
·
verified ·
1 Parent(s): 82f907e

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -27
app.py CHANGED
@@ -361,11 +361,11 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
361
  # Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With")
362
  generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
363
 
364
- # Fix 1b: Fix spacing issues like "furt her" -> "further", "T his" -> "This", "y our" -> "your"
365
  # Remove spaces in the middle of common words
366
  common_words_fix = [
367
  'further', 'this', 'that', 'there', 'where', 'here', 'their', 'your', 'our',
368
- 'man', 'men', 'woman', 'women', 'padua', 'padua', 'content', 'gentle', 'gently',
369
  'house', 'neck', 'car', 'made', 'lost', 'rough', 'see', 'might', 'any', 'one',
370
  'well', 'newly', 'too', 'him', 'her', 'them', 'they', 'the', 'and', 'but',
371
  'for', 'not', 'are', 'was', 'were', 'been', 'have', 'has', 'had', 'will',
@@ -373,24 +373,35 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
373
  'you', 'me', 'my', 'his', 'hers', 'its', 'our', 'ours', 'yours', 'theirs',
374
  'into', 'onto', 'upon', 'within', 'without', 'through', 'though', 'although',
375
  'about', 'above', 'below', 'beside', 'between', 'among', 'during', 'before',
376
- 'after', 'while', 'until', 'since', 'because', 'though', 'although'
 
 
377
  ]
378
  for word in common_words_fix:
379
- # Pattern: word split incorrectly (e.g., "furt her", "T his", "y our", "a m an", "Padu a")
380
- # Handle split at any position
381
  word_lower = word.lower()
382
  for i in range(1, len(word_lower)):
383
  # Split at position i: first part + space + second part
384
  first_part = word_lower[:i]
385
  second_part = word_lower[i:]
386
- # Pattern: word split at this position (case insensitive)
 
387
  pattern = r'\b' + first_part + r'\s+' + second_part + r'\b'
388
  generated_text = re.sub(pattern, word, generated_text, flags=re.IGNORECASE)
389
- # Also handle with capital letters (e.g., "Padu a" -> "Padua")
 
390
  pattern_cap = r'\b' + first_part.capitalize() + r'\s+' + second_part + r'\b'
391
  generated_text = re.sub(pattern_cap, word.capitalize(), generated_text)
 
 
392
  pattern_all_cap = r'\b' + first_part.upper() + r'\s+' + second_part.upper() + r'\b'
393
  generated_text = re.sub(pattern_all_cap, word.upper(), generated_text)
 
 
 
 
 
394
 
395
  # Fix 2: Common word boundaries that got merged (e.g., "perpetualwith" -> "perpetual with")
396
  # Add space before common words that might have been merged
@@ -406,10 +417,10 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
406
  # Fix 2d: Fix spacing after commas (e.g., "What,bear" -> "What, bear")
407
  generated_text = re.sub(r',([a-zA-Z])', r', \1', generated_text)
408
 
409
- # Fix 2e: Fix merged words that should be separate (e.g., "himt" -> "him to")
410
  # Common patterns where words got merged incorrectly
411
- # Pattern: pronoun + "t" (likely "to" got merged)
412
  merged_fixes = [
 
413
  (r'\bhimt\s+', 'him to '), # "himt me" -> "him to me"
414
  (r'\bhert\s+', 'her to '), # "hert him" -> "her to him"
415
  (r'\bthemt\s+', 'them to '), # "themt us" -> "them to us"
@@ -418,6 +429,12 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
418
  (r'\bhert([,.;:!?])', r'her to\1'),
419
  (r'\bthemt([,.;:!?])', r'them to\1'),
420
  (r'\byout([,.;:!?])', r'you to\1'),
 
 
 
 
 
 
421
  ]
422
  for pattern, replacement in merged_fixes:
423
  generated_text = re.sub(pattern, replacement, generated_text, flags=re.IGNORECASE)
@@ -427,6 +444,10 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
427
  # But if it's "contenton" -> "content on"
428
  generated_text = re.sub(r'\bcontenton\b', 'content on', generated_text, flags=re.IGNORECASE)
429
 
 
 
 
 
430
  # Fix 2b: Fix contractions that got merged (e.g., "You'llbe" -> "You'll be")
431
  # Add space after contractions before lowercase words
432
  contractions = ["'ll", "'ve", "'re", "'d", "'t", "'s", "'m"]
@@ -643,24 +664,7 @@ with gr.Blocks(title="GPT-2 124M Shakespeare Model") as demo:
643
  )
644
 
645
  # Example prompts with suggested parameters
646
- gr.Markdown("""
647
- ### Example Prompts (Click to try - includes optimal settings)
648
-
649
- **What to Expect:**
650
- - **Character prompts** (e.g., "ROMEO:", "HAMLET:"): Generates dialogue in that character's style, typically starting with their speech
651
- - **Famous quotes** (e.g., "To be or not"): Continues or expands on the quote in Shakespearean style
652
- - **Romantic prompts** (e.g., "JULIET:", "What light through yonder"): Generates romantic dialogue or poetry
653
- - **Speech prompts** (e.g., "Friends, Romans, countrymen"): Generates dramatic speeches
654
-
655
- **Note:** Each example includes pre-configured optimal parameters. The model may generate:
656
- - ✅ Shakespearean-style dialogue with proper speaker names
657
- - ✅ Theatrical language and phrasing
658
- - ⚠️ Some spacing issues (automatically fixed by post-processing)
659
- - ⚠️ Occasional repetition (mitigated by repetition penalty)
660
- - ⚠️ May not always match exact Shakespeare quotes (model is 124M, not trained to memorize)
661
-
662
- **Tip:** Try different examples to see how the model adapts to different character styles and contexts!
663
- """)
664
  examples = gr.Examples(
665
  examples=[
666
  # Format: [prompt, max_tokens, temperature, top_k, top_p, repetition_penalty]
 
361
  # Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With")
362
  generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
363
 
364
+ # Fix 1b: Fix spacing issues like "furt her" -> "further", "T his" -> "This", "y our" -> "your", "Th at" -> "That"
365
  # Remove spaces in the middle of common words
366
  common_words_fix = [
367
  'further', 'this', 'that', 'there', 'where', 'here', 'their', 'your', 'our',
368
+ 'man', 'men', 'woman', 'women', 'padua', 'content', 'gentle', 'gently',
369
  'house', 'neck', 'car', 'made', 'lost', 'rough', 'see', 'might', 'any', 'one',
370
  'well', 'newly', 'too', 'him', 'her', 'them', 'they', 'the', 'and', 'but',
371
  'for', 'not', 'are', 'was', 'were', 'been', 'have', 'has', 'had', 'will',
 
373
  'you', 'me', 'my', 'his', 'hers', 'its', 'our', 'ours', 'yours', 'theirs',
374
  'into', 'onto', 'upon', 'within', 'without', 'through', 'though', 'although',
375
  'about', 'above', 'below', 'beside', 'between', 'among', 'during', 'before',
376
+ 'after', 'while', 'until', 'since', 'because', 'together', 'honour', 'honor',
377
+ 'already', 'perfect', 'soul', 'way', 'wounds', 'tears', 'raise', 'call',
378
+ 'citizens', 'senator', 'liked', 'cold', 'incold', 'incwold'
379
  ]
380
  for word in common_words_fix:
381
+ # Pattern: word split incorrectly (e.g., "furt her", "T his", "y our", "a m an", "Padu a", "Th at")
382
+ # Handle split at any position, including with capital letters
383
  word_lower = word.lower()
384
  for i in range(1, len(word_lower)):
385
  # Split at position i: first part + space + second part
386
  first_part = word_lower[:i]
387
  second_part = word_lower[i:]
388
+
389
+ # Pattern 1: lowercase split (e.g., "furt her" -> "further")
390
  pattern = r'\b' + first_part + r'\s+' + second_part + r'\b'
391
  generated_text = re.sub(pattern, word, generated_text, flags=re.IGNORECASE)
392
+
393
+ # Pattern 2: Capital letter split (e.g., "Th at" -> "That", "T his" -> "This")
394
  pattern_cap = r'\b' + first_part.capitalize() + r'\s+' + second_part + r'\b'
395
  generated_text = re.sub(pattern_cap, word.capitalize(), generated_text)
396
+
397
+ # Pattern 3: All caps split (e.g., "TH AT" -> "THAT")
398
  pattern_all_cap = r'\b' + first_part.upper() + r'\s+' + second_part.upper() + r'\b'
399
  generated_text = re.sub(pattern_all_cap, word.upper(), generated_text)
400
+
401
+ # Pattern 4: Mixed case with capital in first part (e.g., "Th at" -> "That")
402
+ if len(first_part) > 0:
403
+ pattern_mixed = r'\b' + first_part[0].upper() + first_part[1:] + r'\s+' + second_part + r'\b'
404
+ generated_text = re.sub(pattern_mixed, word.capitalize(), generated_text, flags=re.IGNORECASE)
405
 
406
  # Fix 2: Common word boundaries that got merged (e.g., "perpetualwith" -> "perpetual with")
407
  # Add space before common words that might have been merged
 
417
  # Fix 2d: Fix spacing after commas (e.g., "What,bear" -> "What, bear")
418
  generated_text = re.sub(r',([a-zA-Z])', r', \1', generated_text)
419
 
420
+ # Fix 2e: Fix merged words that should be separate (e.g., "himt" -> "him to", "incwold" -> "in cold")
421
  # Common patterns where words got merged incorrectly
 
422
  merged_fixes = [
423
+ # Pronoun + "t" (likely "to" got merged)
424
  (r'\bhimt\s+', 'him to '), # "himt me" -> "him to me"
425
  (r'\bhert\s+', 'her to '), # "hert him" -> "her to him"
426
  (r'\bthemt\s+', 'them to '), # "themt us" -> "them to us"
 
429
  (r'\bhert([,.;:!?])', r'her to\1'),
430
  (r'\bthemt([,.;:!?])', r'them to\1'),
431
  (r'\byout([,.;:!?])', r'you to\1'),
432
+ # Other merged patterns
433
+ (r'\bincwold\b', 'in cold'), # "incwold" -> "in cold"
434
+ (r'\bincold\b', 'in cold'), # "incold" -> "in cold"
435
+ (r'\blikeled\b', 'liked'), # "likeled" -> "liked" (or could be "like led" but "liked" is more common)
436
+ (r'\bh\s+on\s+our\b', 'honour'), # "h on our" -> "honour"
437
+ (r'\bh\s+on\s+or\b', 'honor'), # "h on or" -> "honor" (American spelling)
438
  ]
439
  for pattern, replacement in merged_fixes:
440
  generated_text = re.sub(pattern, replacement, generated_text, flags=re.IGNORECASE)
 
444
  # But if it's "contenton" -> "content on"
445
  generated_text = re.sub(r'\bcontenton\b', 'content on', generated_text, flags=re.IGNORECASE)
446
 
447
+ # Fix 2g: Fix "toget her" -> "together" (but be careful - "get her" is also valid)
448
+ # Only fix if it's clearly "together" (context-dependent, but "toget her" is likely "together")
449
+ generated_text = re.sub(r'\btoget\s+her\b', 'together', generated_text, flags=re.IGNORECASE)
450
+
451
  # Fix 2b: Fix contractions that got merged (e.g., "You'llbe" -> "You'll be")
452
  # Add space after contractions before lowercase words
453
  contractions = ["'ll", "'ve", "'re", "'d", "'t", "'s", "'m"]
 
664
  )
665
 
666
  # Example prompts with suggested parameters
667
+ gr.Markdown("### Example Prompts (Click to try - includes optimal settings)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
  examples = gr.Examples(
669
  examples=[
670
  # Format: [prompt, max_tokens, temperature, top_k, top_p, repetition_penalty]