Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -361,11 +361,11 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
|
|
| 361 |
# Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With")
|
| 362 |
generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
|
| 363 |
|
| 364 |
-
# Fix 1b: Fix spacing issues like "furt her" -> "further", "T his" -> "This", "y our" -> "your"
|
| 365 |
# Remove spaces in the middle of common words
|
| 366 |
common_words_fix = [
|
| 367 |
'further', 'this', 'that', 'there', 'where', 'here', 'their', 'your', 'our',
|
| 368 |
-
'man', 'men', 'woman', 'women', 'padua', '
|
| 369 |
'house', 'neck', 'car', 'made', 'lost', 'rough', 'see', 'might', 'any', 'one',
|
| 370 |
'well', 'newly', 'too', 'him', 'her', 'them', 'they', 'the', 'and', 'but',
|
| 371 |
'for', 'not', 'are', 'was', 'were', 'been', 'have', 'has', 'had', 'will',
|
|
@@ -373,24 +373,35 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
|
|
| 373 |
'you', 'me', 'my', 'his', 'hers', 'its', 'our', 'ours', 'yours', 'theirs',
|
| 374 |
'into', 'onto', 'upon', 'within', 'without', 'through', 'though', 'although',
|
| 375 |
'about', 'above', 'below', 'beside', 'between', 'among', 'during', 'before',
|
| 376 |
-
'after', 'while', 'until', 'since', 'because', '
|
|
|
|
|
|
|
| 377 |
]
|
| 378 |
for word in common_words_fix:
|
| 379 |
-
# Pattern: word split incorrectly (e.g., "furt her", "T his", "y our", "a m an", "Padu a")
|
| 380 |
-
# Handle split at any position
|
| 381 |
word_lower = word.lower()
|
| 382 |
for i in range(1, len(word_lower)):
|
| 383 |
# Split at position i: first part + space + second part
|
| 384 |
first_part = word_lower[:i]
|
| 385 |
second_part = word_lower[i:]
|
| 386 |
-
|
|
|
|
| 387 |
pattern = r'\b' + first_part + r'\s+' + second_part + r'\b'
|
| 388 |
generated_text = re.sub(pattern, word, generated_text, flags=re.IGNORECASE)
|
| 389 |
-
|
|
|
|
| 390 |
pattern_cap = r'\b' + first_part.capitalize() + r'\s+' + second_part + r'\b'
|
| 391 |
generated_text = re.sub(pattern_cap, word.capitalize(), generated_text)
|
|
|
|
|
|
|
| 392 |
pattern_all_cap = r'\b' + first_part.upper() + r'\s+' + second_part.upper() + r'\b'
|
| 393 |
generated_text = re.sub(pattern_all_cap, word.upper(), generated_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
|
| 395 |
# Fix 2: Common word boundaries that got merged (e.g., "perpetualwith" -> "perpetual with")
|
| 396 |
# Add space before common words that might have been merged
|
|
@@ -406,10 +417,10 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
|
|
| 406 |
# Fix 2d: Fix spacing after commas (e.g., "What,bear" -> "What, bear")
|
| 407 |
generated_text = re.sub(r',([a-zA-Z])', r', \1', generated_text)
|
| 408 |
|
| 409 |
-
# Fix 2e: Fix merged words that should be separate (e.g., "himt" -> "him to")
|
| 410 |
# Common patterns where words got merged incorrectly
|
| 411 |
-
# Pattern: pronoun + "t" (likely "to" got merged)
|
| 412 |
merged_fixes = [
|
|
|
|
| 413 |
(r'\bhimt\s+', 'him to '), # "himt me" -> "him to me"
|
| 414 |
(r'\bhert\s+', 'her to '), # "hert him" -> "her to him"
|
| 415 |
(r'\bthemt\s+', 'them to '), # "themt us" -> "them to us"
|
|
@@ -418,6 +429,12 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
|
|
| 418 |
(r'\bhert([,.;:!?])', r'her to\1'),
|
| 419 |
(r'\bthemt([,.;:!?])', r'them to\1'),
|
| 420 |
(r'\byout([,.;:!?])', r'you to\1'),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
]
|
| 422 |
for pattern, replacement in merged_fixes:
|
| 423 |
generated_text = re.sub(pattern, replacement, generated_text, flags=re.IGNORECASE)
|
|
@@ -427,6 +444,10 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
|
|
| 427 |
# But if it's "contenton" -> "content on"
|
| 428 |
generated_text = re.sub(r'\bcontenton\b', 'content on', generated_text, flags=re.IGNORECASE)
|
| 429 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
# Fix 2b: Fix contractions that got merged (e.g., "You'llbe" -> "You'll be")
|
| 431 |
# Add space after contractions before lowercase words
|
| 432 |
contractions = ["'ll", "'ve", "'re", "'d", "'t", "'s", "'m"]
|
|
@@ -643,24 +664,7 @@ with gr.Blocks(title="GPT-2 124M Shakespeare Model") as demo:
|
|
| 643 |
)
|
| 644 |
|
| 645 |
# Example prompts with suggested parameters
|
| 646 |
-
gr.Markdown(""
|
| 647 |
-
### Example Prompts (Click to try - includes optimal settings)
|
| 648 |
-
|
| 649 |
-
**What to Expect:**
|
| 650 |
-
- **Character prompts** (e.g., "ROMEO:", "HAMLET:"): Generates dialogue in that character's style, typically starting with their speech
|
| 651 |
-
- **Famous quotes** (e.g., "To be or not"): Continues or expands on the quote in Shakespearean style
|
| 652 |
-
- **Romantic prompts** (e.g., "JULIET:", "What light through yonder"): Generates romantic dialogue or poetry
|
| 653 |
-
- **Speech prompts** (e.g., "Friends, Romans, countrymen"): Generates dramatic speeches
|
| 654 |
-
|
| 655 |
-
**Note:** Each example includes pre-configured optimal parameters. The model may generate:
|
| 656 |
-
- ✅ Shakespearean-style dialogue with proper speaker names
|
| 657 |
-
- ✅ Theatrical language and phrasing
|
| 658 |
-
- ⚠️ Some spacing issues (automatically fixed by post-processing)
|
| 659 |
-
- ⚠️ Occasional repetition (mitigated by repetition penalty)
|
| 660 |
-
- ⚠️ May not always match exact Shakespeare quotes (model is 124M, not trained to memorize)
|
| 661 |
-
|
| 662 |
-
**Tip:** Try different examples to see how the model adapts to different character styles and contexts!
|
| 663 |
-
""")
|
| 664 |
examples = gr.Examples(
|
| 665 |
examples=[
|
| 666 |
# Format: [prompt, max_tokens, temperature, top_k, top_p, repetition_penalty]
|
|
|
|
| 361 |
# Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With")
|
| 362 |
generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
|
| 363 |
|
| 364 |
+
# Fix 1b: Fix spacing issues like "furt her" -> "further", "T his" -> "This", "y our" -> "your", "Th at" -> "That"
|
| 365 |
# Remove spaces in the middle of common words
|
| 366 |
common_words_fix = [
|
| 367 |
'further', 'this', 'that', 'there', 'where', 'here', 'their', 'your', 'our',
|
| 368 |
+
'man', 'men', 'woman', 'women', 'padua', 'content', 'gentle', 'gently',
|
| 369 |
'house', 'neck', 'car', 'made', 'lost', 'rough', 'see', 'might', 'any', 'one',
|
| 370 |
'well', 'newly', 'too', 'him', 'her', 'them', 'they', 'the', 'and', 'but',
|
| 371 |
'for', 'not', 'are', 'was', 'were', 'been', 'have', 'has', 'had', 'will',
|
|
|
|
| 373 |
'you', 'me', 'my', 'his', 'hers', 'its', 'our', 'ours', 'yours', 'theirs',
|
| 374 |
'into', 'onto', 'upon', 'within', 'without', 'through', 'though', 'although',
|
| 375 |
'about', 'above', 'below', 'beside', 'between', 'among', 'during', 'before',
|
| 376 |
+
'after', 'while', 'until', 'since', 'because', 'together', 'honour', 'honor',
|
| 377 |
+
'already', 'perfect', 'soul', 'way', 'wounds', 'tears', 'raise', 'call',
|
| 378 |
+
'citizens', 'senator', 'liked', 'cold', 'incold', 'incwold'
|
| 379 |
]
|
| 380 |
for word in common_words_fix:
|
| 381 |
+
# Pattern: word split incorrectly (e.g., "furt her", "T his", "y our", "a m an", "Padu a", "Th at")
|
| 382 |
+
# Handle split at any position, including with capital letters
|
| 383 |
word_lower = word.lower()
|
| 384 |
for i in range(1, len(word_lower)):
|
| 385 |
# Split at position i: first part + space + second part
|
| 386 |
first_part = word_lower[:i]
|
| 387 |
second_part = word_lower[i:]
|
| 388 |
+
|
| 389 |
+
# Pattern 1: lowercase split (e.g., "furt her" -> "further")
|
| 390 |
pattern = r'\b' + first_part + r'\s+' + second_part + r'\b'
|
| 391 |
generated_text = re.sub(pattern, word, generated_text, flags=re.IGNORECASE)
|
| 392 |
+
|
| 393 |
+
# Pattern 2: Capital letter split (e.g., "Th at" -> "That", "T his" -> "This")
|
| 394 |
pattern_cap = r'\b' + first_part.capitalize() + r'\s+' + second_part + r'\b'
|
| 395 |
generated_text = re.sub(pattern_cap, word.capitalize(), generated_text)
|
| 396 |
+
|
| 397 |
+
# Pattern 3: All caps split (e.g., "TH AT" -> "THAT")
|
| 398 |
pattern_all_cap = r'\b' + first_part.upper() + r'\s+' + second_part.upper() + r'\b'
|
| 399 |
generated_text = re.sub(pattern_all_cap, word.upper(), generated_text)
|
| 400 |
+
|
| 401 |
+
# Pattern 4: Mixed case with capital in first part (e.g., "Th at" -> "That")
|
| 402 |
+
if len(first_part) > 0:
|
| 403 |
+
pattern_mixed = r'\b' + first_part[0].upper() + first_part[1:] + r'\s+' + second_part + r'\b'
|
| 404 |
+
generated_text = re.sub(pattern_mixed, word.capitalize(), generated_text, flags=re.IGNORECASE)
|
| 405 |
|
| 406 |
# Fix 2: Common word boundaries that got merged (e.g., "perpetualwith" -> "perpetual with")
|
| 407 |
# Add space before common words that might have been merged
|
|
|
|
| 417 |
# Fix 2d: Fix spacing after commas (e.g., "What,bear" -> "What, bear")
|
| 418 |
generated_text = re.sub(r',([a-zA-Z])', r', \1', generated_text)
|
| 419 |
|
| 420 |
+
# Fix 2e: Fix merged words that should be separate (e.g., "himt" -> "him to", "incwold" -> "in cold")
|
| 421 |
# Common patterns where words got merged incorrectly
|
|
|
|
| 422 |
merged_fixes = [
|
| 423 |
+
# Pronoun + "t" (likely "to" got merged)
|
| 424 |
(r'\bhimt\s+', 'him to '), # "himt me" -> "him to me"
|
| 425 |
(r'\bhert\s+', 'her to '), # "hert him" -> "her to him"
|
| 426 |
(r'\bthemt\s+', 'them to '), # "themt us" -> "them to us"
|
|
|
|
| 429 |
(r'\bhert([,.;:!?])', r'her to\1'),
|
| 430 |
(r'\bthemt([,.;:!?])', r'them to\1'),
|
| 431 |
(r'\byout([,.;:!?])', r'you to\1'),
|
| 432 |
+
# Other merged patterns
|
| 433 |
+
(r'\bincwold\b', 'in cold'), # "incwold" -> "in cold"
|
| 434 |
+
(r'\bincold\b', 'in cold'), # "incold" -> "in cold"
|
| 435 |
+
(r'\blikeled\b', 'liked'), # "likeled" -> "liked" (or could be "like led" but "liked" is more common)
|
| 436 |
+
(r'\bh\s+on\s+our\b', 'honour'), # "h on our" -> "honour"
|
| 437 |
+
(r'\bh\s+on\s+or\b', 'honor'), # "h on or" -> "honor" (American spelling)
|
| 438 |
]
|
| 439 |
for pattern, replacement in merged_fixes:
|
| 440 |
generated_text = re.sub(pattern, replacement, generated_text, flags=re.IGNORECASE)
|
|
|
|
| 444 |
# But if it's "contenton" -> "content on"
|
| 445 |
generated_text = re.sub(r'\bcontenton\b', 'content on', generated_text, flags=re.IGNORECASE)
|
| 446 |
|
| 447 |
+
# Fix 2g: Fix "toget her" -> "together" (but be careful - "get her" is also valid)
|
| 448 |
+
# Only fix if it's clearly "together" (context-dependent, but "toget her" is likely "together")
|
| 449 |
+
generated_text = re.sub(r'\btoget\s+her\b', 'together', generated_text, flags=re.IGNORECASE)
|
| 450 |
+
|
| 451 |
# Fix 2b: Fix contractions that got merged (e.g., "You'llbe" -> "You'll be")
|
| 452 |
# Add space after contractions before lowercase words
|
| 453 |
contractions = ["'ll", "'ve", "'re", "'d", "'t", "'s", "'m"]
|
|
|
|
| 664 |
)
|
| 665 |
|
| 666 |
# Example prompts with suggested parameters
|
| 667 |
+
gr.Markdown("### Example Prompts (Click to try - includes optimal settings)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
examples = gr.Examples(
|
| 669 |
examples=[
|
| 670 |
# Format: [prompt, max_tokens, temperature, top_k, top_p, repetition_penalty]
|