Spaces:
Sleeping
Sleeping
new approach
Browse files
app.py
CHANGED
|
@@ -1,49 +1,44 @@
|
|
| 1 |
import re
|
| 2 |
import gradio as gr
|
| 3 |
|
| 4 |
-
|
| 5 |
def insert_points(text):
|
| 6 |
-
#
|
| 7 |
-
lines = text.split('\n')
|
| 8 |
-
|
| 9 |
counter = 1
|
| 10 |
-
output = []
|
| 11 |
-
|
| 12 |
-
for line in lines:
|
| 13 |
-
# This pattern captures sequences of '#' (1 or more) plus optional trailing spaces
|
| 14 |
-
# We'll split the line on these sequences, but keep them so we can re-insert with tags.
|
| 15 |
-
segments = re.split(r'(\#+\s*)', line)
|
| 16 |
-
|
| 17 |
-
# We'll rebuild this line piece by piece
|
| 18 |
-
rebuilt_line = []
|
| 19 |
-
|
| 20 |
-
for seg in segments:
|
| 21 |
-
if re.match(r'^\#+\s*$', seg):
|
| 22 |
-
# This segment is a run of '#' (with optional trailing space)
|
| 23 |
-
# Insert a tag BEFORE it
|
| 24 |
-
tag = f"[INSERT_POINT_{counter:03d}]"
|
| 25 |
-
counter += 1
|
| 26 |
-
rebuilt_line.append(tag)
|
| 27 |
-
|
| 28 |
-
# Then append the actual '#' segment
|
| 29 |
-
rebuilt_line.append(seg)
|
| 30 |
-
else:
|
| 31 |
-
# Normal text
|
| 32 |
-
rebuilt_line.append(seg)
|
| 33 |
-
|
| 34 |
-
# After processing all segments in this line,
|
| 35 |
-
# append a final tag to match your "insert a tag after every line" requirement
|
| 36 |
-
line_tag = f"[INSERT_POINT_{counter:03d}]"
|
| 37 |
-
counter += 1
|
| 38 |
-
rebuilt_line.append(line_tag)
|
| 39 |
-
|
| 40 |
-
# Join the processed segments for this line
|
| 41 |
-
output.append("".join(rebuilt_line))
|
| 42 |
-
|
| 43 |
-
# Finally, join all lines with newlines
|
| 44 |
-
return "\n".join(output)
|
| 45 |
-
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
demo = gr.Interface(
|
| 48 |
fn=insert_points,
|
| 49 |
inputs=gr.Textbox(
|
|
@@ -51,13 +46,14 @@ demo = gr.Interface(
|
|
| 51 |
placeholder="Paste your text here...",
|
| 52 |
label="Your Input Text"
|
| 53 |
),
|
| 54 |
-
outputs="
|
| 55 |
title="Insert Point Tagger",
|
|
|
|
| 56 |
description=(
|
| 57 |
"Paste a block of text and get '[INSERT_POINT_###]' tags added "
|
| 58 |
-
"1)
|
| 59 |
),
|
| 60 |
)
|
| 61 |
|
| 62 |
if __name__ == "__main__":
|
| 63 |
-
demo.launch()
|
|
|
|
| 1 |
import re
|
| 2 |
import gradio as gr
|
| 3 |
|
|
|
|
| 4 |
def insert_points(text):
|
| 5 |
+
# Initialize a counter accessible by the replacer function
|
|
|
|
|
|
|
| 6 |
counter = 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
# This function will be called for each match found by re.sub
|
| 9 |
+
# It inserts the tag *before* the matched text (newline or hash sequence)
|
| 10 |
+
def replacer(match):
|
| 11 |
+
nonlocal counter # Use the counter from the outer scope
|
| 12 |
+
tag = f"[INSERT_POINT_{counter:03d}]"
|
| 13 |
+
counter += 1
|
| 14 |
+
# match.group(0) contains the actual matched string ('\n' or '##' etc.)
|
| 15 |
+
return tag + match.group(0)
|
| 16 |
+
|
| 17 |
+
# The pattern looks for either:
|
| 18 |
+
# 1) A newline character ('\n')
|
| 19 |
+
# 2) A sequence of one or more '#' characters ('\#+')
|
| 20 |
+
# '#' needs to be escaped ('\#') because it's a special regex character.
|
| 21 |
+
# The parentheses create capturing groups, but match.group(0) gives the whole match anyway.
|
| 22 |
+
pattern = r'(\n|\#+)'
|
| 23 |
+
|
| 24 |
+
# Use re.sub to find all matches of the pattern and replace them
|
| 25 |
+
# by calling the 'replacer' function for each match.
|
| 26 |
+
processed_text = re.sub(pattern, replacer, text)
|
| 27 |
+
|
| 28 |
+
# One edge case: If the *very beginning* of the text starts with '#',
|
| 29 |
+
# the regex above won't match anything *before* it.
|
| 30 |
+
# We need to check if the text starts with hashes (possibly after whitespace)
|
| 31 |
+
# and prepend the first tag if necessary.
|
| 32 |
+
# However, the current `re.sub(pattern, replacer, text)` already handles this
|
| 33 |
+
# correctly because it finds the '#' sequence at the beginning and the
|
| 34 |
+
# replacer adds the tag *before* it. Let's re-verify.
|
| 35 |
+
# Example: If text is "## Title", pattern finds "##" at index 0.
|
| 36 |
+
# Replacer runs, returns "[INSERT_POINT_001]##". Result is correct.
|
| 37 |
+
# So, no special handling for the beginning is needed with this pattern.
|
| 38 |
+
|
| 39 |
+
return processed_text
|
| 40 |
+
|
| 41 |
+
# --- Gradio Interface Code (Unchanged from your original) ---
|
| 42 |
demo = gr.Interface(
|
| 43 |
fn=insert_points,
|
| 44 |
inputs=gr.Textbox(
|
|
|
|
| 46 |
placeholder="Paste your text here...",
|
| 47 |
label="Your Input Text"
|
| 48 |
),
|
| 49 |
+
outputs=gr.Textbox(label="Processed Text with Tags"), # Changed output type to Textbox for better viewing
|
| 50 |
title="Insert Point Tagger",
|
| 51 |
+
# Updated description for clarity
|
| 52 |
description=(
|
| 53 |
"Paste a block of text and get '[INSERT_POINT_###]' tags added "
|
| 54 |
+
"1) **before** every newline, and 2) **before** every '#' sequence (heading)."
|
| 55 |
),
|
| 56 |
)
|
| 57 |
|
| 58 |
if __name__ == "__main__":
|
| 59 |
+
demo.launch()
|