Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -368,15 +368,31 @@ def generate_text(prompt, max_new_tokens=100, temperature=0.7, top_k=50, top_p=0
|
|
| 368 |
generated_text = '\n'.join(cleaned_lines)
|
| 369 |
|
| 370 |
# If after removing prompt, first line is orphaned dialogue (no speaker), handle it
|
| 371 |
-
|
|
|
|
| 372 |
lines = generated_text.split('\n')
|
| 373 |
first_line = lines[0].strip() if lines else ''
|
| 374 |
-
|
| 375 |
-
if
|
| 376 |
-
#
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
|
| 381 |
# Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With", "AOr" -> "A Or")
|
| 382 |
generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
|
|
|
|
| 368 |
generated_text = '\n'.join(cleaned_lines)
|
| 369 |
|
| 370 |
# If after removing prompt, first line is orphaned dialogue (no speaker), handle it
|
| 371 |
+
# Keep removing orphaned dialogue at the start until we find a speaker or valid content
|
| 372 |
+
while generated_text.strip():
|
| 373 |
lines = generated_text.split('\n')
|
| 374 |
first_line = lines[0].strip() if lines else ''
|
| 375 |
+
|
| 376 |
+
if not first_line:
|
| 377 |
+
# Remove empty first line
|
| 378 |
+
generated_text = '\n'.join(lines[1:]) if len(lines) > 1 else ''
|
| 379 |
+
continue
|
| 380 |
+
|
| 381 |
+
# Check if first line is a speaker name
|
| 382 |
+
is_speaker = re.match(r'^([A-Z][A-Z\s]+?):\s*$', first_line) or \
|
| 383 |
+
re.match(r'^([A-Z][a-z]+(?:\s+[a-zA-Z]+)+):\s*$', first_line)
|
| 384 |
+
|
| 385 |
+
if is_speaker:
|
| 386 |
+
# Found a speaker, stop removing
|
| 387 |
+
break
|
| 388 |
+
|
| 389 |
+
# Check if it's orphaned dialogue (starts with capital, has punctuation, but no speaker)
|
| 390 |
+
if re.match(r'^[A-Z]', first_line) and ('.' in first_line or ',' in first_line or '!' in first_line or '?' in first_line):
|
| 391 |
+
# Remove the orphaned first line
|
| 392 |
+
generated_text = '\n'.join(lines[1:]) if len(lines) > 1 else ''
|
| 393 |
+
else:
|
| 394 |
+
# Not clearly orphaned dialogue, stop removing
|
| 395 |
+
break
|
| 396 |
|
| 397 |
# Fix 1: lowercase followed by uppercase (e.g., "perpetualWith" -> "perpetual With", "AOr" -> "A Or")
|
| 398 |
generated_text = re.sub(r'([a-z])([A-Z])', r'\1 \2', generated_text)
|