BtB-ExpC commited on
Commit
d4f7222
·
1 Parent(s): 73a9e74

try new approach

Browse files
Files changed (1) hide show
  1. app.py +56 -42
app.py CHANGED
@@ -1,61 +1,75 @@
1
  import re
2
  import gradio as gr
3
 
4
- def insert_points(text):
5
- # --- Step 0: Normalize line endings ---
6
- # Replace Windows (\r\n) and old Mac (\r) line endings with Unix (\n)
7
- # This simplifies the regex patterns below.
8
- text = text.replace('\r\n', '\n').replace('\r', '\n')
9
 
 
10
  counter = 1
11
 
12
- # Replacer function for the initial insertion pass
13
- def replacer(match):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  nonlocal counter
15
- tag = f"[INSERT_POINT_{counter:03d}]"
 
 
16
  counter += 1
17
- # Return the tag followed by the original matched text
18
- # group(0) will be one or more newlines OR one or more hashes
19
- return tag + match.group(0)
20
-
21
- # --- Step 1: Insert tags before hash sequences and consolidated newline sequences ---
22
- # Pattern matches:
23
- # 1) '\n+' : One or more consecutive (now normalized) newline characters. Collapses blank lines.
24
- # 2) '\#+' : One or more consecutive '#' characters.
25
- pattern_initial = r'(\n+|\#+)'
26
- processed_text = re.sub(pattern_initial, replacer, text)
27
-
28
- # --- Step 2: Clean up potential heading splits ---
29
- # This fixes cases where Step 1 resulted in "[TAG_A]\n[TAG_B]###"
30
- # It looks for TAG_A, followed by whitespace (\s* includes the \n),
31
- # followed by TAG_B, followed by hashes (#+).
32
- cleanup_pattern = r'(\[INSERT_POINT_\d{3}\])\s*(\[INSERT_POINT_\d{3}\])(\#+)'
33
- # Replaces the whole match with just TAG_B (\2) and the hashes (\3).
34
- processed_text = re.sub(cleanup_pattern, r'\2\3', processed_text)
35
-
36
- return processed_text
37
-
38
- # --- Gradio Interface Code (with Copy Button) ---
39
  demo = gr.Interface(
40
  fn=insert_points,
41
  inputs=gr.Textbox(
42
  lines=10,
43
- placeholder="Paste your text here...\n(Line endings will be normalized)",
44
  label="Your Input Text"
45
  ),
46
- outputs=gr.Textbox(
47
- label="Processed Text with Tags",
48
- show_copy_button=True # <--- Added copy button here
49
- ),
50
- title="Insert Point Tagger v3", # Optional: update title
51
  description=(
52
- "Paste text to add '[INSERT_POINT_###]' tags:\n"
53
- "1) **Before** each '#' sequence (headings).\n"
54
- "2) **Before** each sequence of one or more newlines (one tag per line break/blank line).\n"
55
- "Normalizes line endings and cleans up heading tag spacing."
 
56
  ),
57
- allow_flagging='never'
58
  )
59
 
60
  if __name__ == "__main__":
61
- demo.launch()
 
1
  import re
2
  import gradio as gr
3
 
 
 
 
 
 
4
 
5
+ def insert_points(text):
6
  counter = 1
7
 
8
+ # -----------------------------
9
+ # 1) COLLAPSE MULTIPLE BLANK LINES INTO ONE
10
+ # e.g. 2+ newlines in a row => single newline
11
+ # -----------------------------
12
+ text = re.sub(r'\n\s*\n+', '\n', text)
13
+
14
+ # -----------------------------
15
+ # 2) INSERT A TAG BEFORE EACH REMAINING NEWLINE
16
+ # So every line (including blank ones) is prefixed by exactly one tag
17
+ # -----------------------------
18
+ def newline_replacer(_match):
19
+ nonlocal counter
20
+ out = f"[INSERT_POINT_{counter:03d}]\n"
21
+ counter += 1
22
+ return out
23
+
24
+ text = re.sub(r'\n', newline_replacer, text)
25
+
26
+ # -----------------------------
27
+ # 3) INSERT A TAG BEFORE EACH HEADING IF IT'S NOT
28
+ # (A) ALREADY TAGGED, OR
29
+ # (B) IMMEDIATELY AFTER A NEWLINE (WHICH WOULD HAVE A TAG)
30
+ #
31
+ # We detect # or ## or ### up to 6 in a row. If there's no
32
+ # [INSERT_POINT_\d{3}] or newline right before, we insert a new tag.
33
+ # -----------------------------
34
+ def heading_replacer(m):
35
  nonlocal counter
36
+ # group(1) is the string of hashes, like "##" or "#####"
37
+ hashes = m.group(1)
38
+ replacement = f"[INSERT_POINT_{counter:03d}]{hashes}"
39
  counter += 1
40
+ return replacement
41
+
42
+ # Negative lookbehind means:
43
+ # - Not preceded by [INSERT_POINT_###]
44
+ # - Not preceded by a newline
45
+ # So if the line starts with "##" and we've just inserted a tag for the line break,
46
+ # we won't insert a second tag for the heading.
47
+ pattern = r'(?<!\[INSERT_POINT_\d{3}\])(?<!\n)(\#{1,6})'
48
+ text = re.sub(pattern, heading_replacer, text)
49
+
50
+ return text
51
+
52
+
53
+ # -----------------------------
54
+ # GRADIO INTERFACE
55
+ # -----------------------------
 
 
 
 
 
 
56
  demo = gr.Interface(
57
  fn=insert_points,
58
  inputs=gr.Textbox(
59
  lines=10,
60
+ placeholder="Paste your text here...",
61
  label="Your Input Text"
62
  ),
63
+ outputs=gr.Textbox(label="Processed Text with Tags"),
64
+ title="Insert Point Tagger",
 
 
 
65
  description=(
66
+ "1) Collapses multiple blank lines into one.\n"
67
+ "2) Inserts a single [INSERT_POINT_XXX] tag before each remaining newline.\n"
68
+ "3) Inserts a single [INSERT_POINT_XXX] tag before any '#' heading, unless\n"
69
+ " there's already a tag from the preceding newline.\n"
70
+ "This avoids double tags while keeping heading text on the same line."
71
  ),
 
72
  )
73
 
74
  if __name__ == "__main__":
75
+ demo.launch()