BtB-ExpC commited on
Commit
225d229
·
1 Parent(s): 8f04db2

new linear approach

Browse files
Files changed (1) hide show
  1. app.py +43 -29
app.py CHANGED
@@ -25,46 +25,60 @@ def insert_points(text):
25
  text = '\n' + text
26
 
27
  # -----------------------------
28
- # 2) TAG ALL HEADINGS
29
- # Now that headings are on their own lines, tag them
30
  # -----------------------------
31
- def heading_replacer(m):
32
- nonlocal counter
33
- heading = m.group(1) # The entire heading including hash symbols
34
- tag = f"[INSERT_POINT_{counter:03d}]\n"
35
- counter += 1
36
- return f"\n{tag}{heading}"
37
 
38
- # Look for any heading pattern (one or more # followed by space and text)
39
- text = re.sub(r'\n(#+\s+.+?)(?=\n|$)', heading_replacer, text)
 
40
 
41
- # -----------------------------
42
- # 3) TAG PARAGRAPH BREAKS
43
- # Add tags at meaningful paragraph breaks
44
- # -----------------------------
45
- def paragraph_replacer(m):
46
- nonlocal counter
47
- tag = f"[INSERT_POINT_{counter:03d}]\n"
48
- counter += 1
49
- return f"\n{tag}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- # Find newlines that aren't already followed by a tag
52
- text = re.sub(r'\n(?!\[INSERT_POINT_)', paragraph_replacer, text)
53
 
54
  # -----------------------------
55
- # 4) ADD TAG AT THE BEGINNING IF NEEDED
56
  # -----------------------------
57
- if not text.startswith('\n[INSERT_POINT_'):
58
- text = f"\n[INSERT_POINT_{counter:03d}]\n" + text
59
  counter += 1
60
 
61
  # -----------------------------
62
- # 5) CLEAN UP: Remove excess newlines
63
  # -----------------------------
64
  # Remove extra blank lines before tags
65
  text = re.sub(r'\n\n+(\[INSERT_POINT_)', r'\n\1', text)
66
  # Remove extra blank lines at the beginning
67
  text = re.sub(r'^\n+', '', text)
 
 
68
 
69
  return text
70
 
@@ -83,10 +97,10 @@ demo = gr.Interface(
83
  title="Insert Point Tagger",
84
  description=(
85
  "This processor:\n"
86
- "1) Separates and tags headings (text starting with #).\n"
87
- "2) Tags paragraph breaks with sequential numbers.\n"
88
- "3) Places each tag on its own line.\n"
89
- "4) Works with heading patterns that may be embedded in running text."
90
  ),
91
  )
92
 
 
25
  text = '\n' + text
26
 
27
  # -----------------------------
28
+ # 2) SPLIT TEXT INTO SEGMENTS AND TAG
29
+ # Process the text in a single pass to avoid duplicate tags
30
  # -----------------------------
 
 
 
 
 
 
31
 
32
+ # Split the text into segments (paragraphs and headings)
33
+ segments = re.split(r'(\n#+\s+.+?(?=\n|$)|\n)', text)
34
+ segments = [s for s in segments if s] # Remove empty segments
35
 
36
+ result = []
37
+ previous_was_tag = False
38
+
39
+ for segment in segments:
40
+ # If this is a newline, add a tag (but not after another tag)
41
+ if segment == '\n':
42
+ if not previous_was_tag:
43
+ result.append(f"\n[INSERT_POINT_{counter:03d}]\n")
44
+ counter += 1
45
+ previous_was_tag = True
46
+ else:
47
+ result.append('\n') # Just add the newline without a tag
48
+
49
+ # If this is a heading, add a tag before it
50
+ elif segment.startswith('\n#'):
51
+ if not previous_was_tag:
52
+ result.append(f"\n[INSERT_POINT_{counter:03d}]")
53
+ counter += 1
54
+ previous_was_tag = True
55
+ result.append(segment)
56
+ previous_was_tag = False
57
+
58
+ # Regular text segment
59
+ else:
60
+ result.append(segment)
61
+ previous_was_tag = False
62
 
63
+ # Join all segments back together
64
+ text = ''.join(result)
65
 
66
  # -----------------------------
67
+ # 3) ADD TAG AT THE BEGINNING IF NEEDED
68
  # -----------------------------
69
+ if not text.startswith('[INSERT_POINT_'):
70
+ text = f"[INSERT_POINT_{counter:03d}]\n" + text
71
  counter += 1
72
 
73
  # -----------------------------
74
+ # 4) CLEAN UP: Remove excess newlines
75
  # -----------------------------
76
  # Remove extra blank lines before tags
77
  text = re.sub(r'\n\n+(\[INSERT_POINT_)', r'\n\1', text)
78
  # Remove extra blank lines at the beginning
79
  text = re.sub(r'^\n+', '', text)
80
+ # Ensure no consecutive tags
81
+ text = re.sub(r'(\[INSERT_POINT_\d{3}]\n)\s*\[INSERT_POINT_\d{3}]', r'\1', text)
82
 
83
  return text
84
 
 
97
  title="Insert Point Tagger",
98
  description=(
99
  "This processor:\n"
100
+ "1) Tags headings and paragraph breaks with sequential numbers.\n"
101
+ "2) Places each tag on its own line.\n"
102
+ "3) Ensures consistent, sequential numbering (001, 002, etc.).\n"
103
+ "4) Avoids consecutive tags - never two tags in a row."
104
  ),
105
  )
106