BtB-ExpC commited on
Commit
96bc994
·
1 Parent(s): b9a12d4

new approach

Browse files
Files changed (1) hide show
  1. app.py +39 -43
app.py CHANGED
@@ -1,49 +1,44 @@
1
  import re
2
  import gradio as gr
3
 
4
-
5
  def insert_points(text):
6
- # Split the input by newline to handle multi-line text
7
- lines = text.split('\n')
8
-
9
  counter = 1
10
- output = []
11
-
12
- for line in lines:
13
- # This pattern captures sequences of '#' (1 or more) plus optional trailing spaces
14
- # We'll split the line on these sequences, but keep them so we can re-insert with tags.
15
- segments = re.split(r'(\#+\s*)', line)
16
-
17
- # We'll rebuild this line piece by piece
18
- rebuilt_line = []
19
-
20
- for seg in segments:
21
- if re.match(r'^\#+\s*$', seg):
22
- # This segment is a run of '#' (with optional trailing space)
23
- # Insert a tag BEFORE it
24
- tag = f"[INSERT_POINT_{counter:03d}]"
25
- counter += 1
26
- rebuilt_line.append(tag)
27
-
28
- # Then append the actual '#' segment
29
- rebuilt_line.append(seg)
30
- else:
31
- # Normal text
32
- rebuilt_line.append(seg)
33
-
34
- # After processing all segments in this line,
35
- # append a final tag to match your "insert a tag after every line" requirement
36
- line_tag = f"[INSERT_POINT_{counter:03d}]"
37
- counter += 1
38
- rebuilt_line.append(line_tag)
39
-
40
- # Join the processed segments for this line
41
- output.append("".join(rebuilt_line))
42
-
43
- # Finally, join all lines with newlines
44
- return "\n".join(output)
45
-
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  demo = gr.Interface(
48
  fn=insert_points,
49
  inputs=gr.Textbox(
@@ -51,13 +46,14 @@ demo = gr.Interface(
51
  placeholder="Paste your text here...",
52
  label="Your Input Text"
53
  ),
54
- outputs="text",
55
  title="Insert Point Tagger",
 
56
  description=(
57
  "Paste a block of text and get '[INSERT_POINT_###]' tags added "
58
- "1) after every newline, and 2) before every '#' sequence."
59
  ),
60
  )
61
 
62
  if __name__ == "__main__":
63
- demo.launch()
 
1
  import re
2
  import gradio as gr
3
 
 
4
  def insert_points(text):
5
+ # Initialize a counter accessible by the replacer function
 
 
6
  counter = 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # This function will be called for each match found by re.sub
9
+ # It inserts the tag *before* the matched text (newline or hash sequence)
10
+ def replacer(match):
11
+ nonlocal counter # Use the counter from the outer scope
12
+ tag = f"[INSERT_POINT_{counter:03d}]"
13
+ counter += 1
14
+ # match.group(0) contains the actual matched string ('\n' or '##' etc.)
15
+ return tag + match.group(0)
16
+
17
+ # The pattern looks for either:
18
+ # 1) A newline character ('\n')
19
+ # 2) A sequence of one or more '#' characters ('\#+')
20
+ # '#' needs to be escaped ('\#') because it's a special regex character.
21
+ # The parentheses create capturing groups, but match.group(0) gives the whole match anyway.
22
+ pattern = r'(\n|\#+)'
23
+
24
+ # Use re.sub to find all matches of the pattern and replace them
25
+ # by calling the 'replacer' function for each match.
26
+ processed_text = re.sub(pattern, replacer, text)
27
+
28
+ # One edge case: If the *very beginning* of the text starts with '#',
29
+ # the regex above won't match anything *before* it.
30
+ # We need to check if the text starts with hashes (possibly after whitespace)
31
+ # and prepend the first tag if necessary.
32
+ # However, the current `re.sub(pattern, replacer, text)` already handles this
33
+ # correctly because it finds the '#' sequence at the beginning and the
34
+ # replacer adds the tag *before* it. Let's re-verify.
35
+ # Example: If text is "## Title", pattern finds "##" at index 0.
36
+ # Replacer runs, returns "[INSERT_POINT_001]##". Result is correct.
37
+ # So, no special handling for the beginning is needed with this pattern.
38
+
39
+ return processed_text
40
+
41
+ # --- Gradio Interface Code (Unchanged from your original) ---
42
  demo = gr.Interface(
43
  fn=insert_points,
44
  inputs=gr.Textbox(
 
46
  placeholder="Paste your text here...",
47
  label="Your Input Text"
48
  ),
49
+ outputs=gr.Textbox(label="Processed Text with Tags"), # Changed output type to Textbox for better viewing
50
  title="Insert Point Tagger",
51
+ # Updated description for clarity
52
  description=(
53
  "Paste a block of text and get '[INSERT_POINT_###]' tags added "
54
+ "1) **before** every newline, and 2) **before** every '#' sequence (heading)."
55
  ),
56
  )
57
 
58
  if __name__ == "__main__":
59
+ demo.launch()