Spaces:
Sleeping
Sleeping
new linear approach
Browse files
app.py
CHANGED
|
@@ -25,46 +25,60 @@ def insert_points(text):
|
|
| 25 |
text = '\n' + text
|
| 26 |
|
| 27 |
# -----------------------------
|
| 28 |
-
# 2)
|
| 29 |
-
#
|
| 30 |
# -----------------------------
|
| 31 |
-
def heading_replacer(m):
|
| 32 |
-
nonlocal counter
|
| 33 |
-
heading = m.group(1) # The entire heading including hash symbols
|
| 34 |
-
tag = f"[INSERT_POINT_{counter:03d}]\n"
|
| 35 |
-
counter += 1
|
| 36 |
-
return f"\n{tag}{heading}"
|
| 37 |
|
| 38 |
-
#
|
| 39 |
-
|
|
|
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
#
|
| 52 |
-
text =
|
| 53 |
|
| 54 |
# -----------------------------
|
| 55 |
-
#
|
| 56 |
# -----------------------------
|
| 57 |
-
if not text.startswith('
|
| 58 |
-
text = f"
|
| 59 |
counter += 1
|
| 60 |
|
| 61 |
# -----------------------------
|
| 62 |
-
#
|
| 63 |
# -----------------------------
|
| 64 |
# Remove extra blank lines before tags
|
| 65 |
text = re.sub(r'\n\n+(\[INSERT_POINT_)', r'\n\1', text)
|
| 66 |
# Remove extra blank lines at the beginning
|
| 67 |
text = re.sub(r'^\n+', '', text)
|
|
|
|
|
|
|
| 68 |
|
| 69 |
return text
|
| 70 |
|
|
@@ -83,10 +97,10 @@ demo = gr.Interface(
|
|
| 83 |
title="Insert Point Tagger",
|
| 84 |
description=(
|
| 85 |
"This processor:\n"
|
| 86 |
-
"1)
|
| 87 |
-
"2)
|
| 88 |
-
"3)
|
| 89 |
-
"4)
|
| 90 |
),
|
| 91 |
)
|
| 92 |
|
|
|
|
| 25 |
text = '\n' + text
|
| 26 |
|
| 27 |
# -----------------------------
|
| 28 |
+
# 2) SPLIT TEXT INTO SEGMENTS AND TAG
|
| 29 |
+
# Process the text in a single pass to avoid duplicate tags
|
| 30 |
# -----------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
# Split the text into segments (paragraphs and headings)
|
| 33 |
+
segments = re.split(r'(\n#+\s+.+?(?=\n|$)|\n)', text)
|
| 34 |
+
segments = [s for s in segments if s] # Remove empty segments
|
| 35 |
|
| 36 |
+
result = []
|
| 37 |
+
previous_was_tag = False
|
| 38 |
+
|
| 39 |
+
for segment in segments:
|
| 40 |
+
# If this is a newline, add a tag (but not after another tag)
|
| 41 |
+
if segment == '\n':
|
| 42 |
+
if not previous_was_tag:
|
| 43 |
+
result.append(f"\n[INSERT_POINT_{counter:03d}]\n")
|
| 44 |
+
counter += 1
|
| 45 |
+
previous_was_tag = True
|
| 46 |
+
else:
|
| 47 |
+
result.append('\n') # Just add the newline without a tag
|
| 48 |
+
|
| 49 |
+
# If this is a heading, add a tag before it
|
| 50 |
+
elif segment.startswith('\n#'):
|
| 51 |
+
if not previous_was_tag:
|
| 52 |
+
result.append(f"\n[INSERT_POINT_{counter:03d}]")
|
| 53 |
+
counter += 1
|
| 54 |
+
previous_was_tag = True
|
| 55 |
+
result.append(segment)
|
| 56 |
+
previous_was_tag = False
|
| 57 |
+
|
| 58 |
+
# Regular text segment
|
| 59 |
+
else:
|
| 60 |
+
result.append(segment)
|
| 61 |
+
previous_was_tag = False
|
| 62 |
|
| 63 |
+
# Join all segments back together
|
| 64 |
+
text = ''.join(result)
|
| 65 |
|
| 66 |
# -----------------------------
|
| 67 |
+
# 3) ADD TAG AT THE BEGINNING IF NEEDED
|
| 68 |
# -----------------------------
|
| 69 |
+
if not text.startswith('[INSERT_POINT_'):
|
| 70 |
+
text = f"[INSERT_POINT_{counter:03d}]\n" + text
|
| 71 |
counter += 1
|
| 72 |
|
| 73 |
# -----------------------------
|
| 74 |
+
# 4) CLEAN UP: Remove excess newlines
|
| 75 |
# -----------------------------
|
| 76 |
# Remove extra blank lines before tags
|
| 77 |
text = re.sub(r'\n\n+(\[INSERT_POINT_)', r'\n\1', text)
|
| 78 |
# Remove extra blank lines at the beginning
|
| 79 |
text = re.sub(r'^\n+', '', text)
|
| 80 |
+
# Ensure no consecutive tags
|
| 81 |
+
text = re.sub(r'(\[INSERT_POINT_\d{3}]\n)\s*\[INSERT_POINT_\d{3}]', r'\1', text)
|
| 82 |
|
| 83 |
return text
|
| 84 |
|
|
|
|
| 97 |
title="Insert Point Tagger",
|
| 98 |
description=(
|
| 99 |
"This processor:\n"
|
| 100 |
+
"1) Tags headings and paragraph breaks with sequential numbers.\n"
|
| 101 |
+
"2) Places each tag on its own line.\n"
|
| 102 |
+
"3) Ensures consistent, sequential numbering (001, 002, etc.).\n"
|
| 103 |
+
"4) Avoids consecutive tags - never two tags in a row."
|
| 104 |
),
|
| 105 |
)
|
| 106 |
|