Bohaska commited on
Commit
aeded65
·
1 Parent(s): aab4729

Fix issue URLs

Browse files
issue_titles.json CHANGED
The diff for this file is too large to render. See raw diff
 
issue_titles_components.json CHANGED
The diff for this file is too large to render. See raw diff
 
small_scripts/make_embedding/embedding.py CHANGED
@@ -110,46 +110,60 @@ def _parse_issue_strict(issue_block: str, global_issue_index: int):
110
 
111
  return desc_text, option_lines
112
 
 
113
 
114
- import re
115
-
116
- def format_issue_title_markdown(issue_block):
117
  """
118
- Extracts anchor and visible title from the first line of the issue block,
119
- and formats as markdown with a forum link.
120
  """
121
- # Find the first non-empty line (should be the title line)
122
- for line in issue_block.splitlines():
123
- line = line.strip()
124
- if not line:
125
- continue
126
-
127
- # Extract anchor (e.g., [anchor=1379])
128
- anchor_match = re.search(r'\[anchor=(\d+)\]', line)
129
- anchor = anchor_match.group(1) if anchor_match else None
130
-
131
- # Extract visible title (after the closing [/anchor]:)
132
- # This matches: [anchor=1379]#1379[/anchor]: <title>
133
- title_match = re.search(r'\[anchor=(\d+)\]\#\d+\[\/anchor\]:\s*(.*)', line)
134
- if title_match:
135
- title_text = title_match.group(2).strip()
136
- else:
137
- # Fallback: try to find after the first colon
138
- parts = line.split(':', 1)
139
- title_text = parts[1].strip() if len(parts) > 1 else line
140
-
141
- # Remove trailing BBCode tags from title (but keep chain/fancy formatting)
142
- title_text = re.sub(r'\[\/?[^\]]+\]', '', title_text).strip()
143
-
144
- # Compose markdown
145
- if anchor:
146
- return f"#{anchor}: [{title_text}](https://forum.nationstates.net/viewtopic.php?f=13&t=88#{anchor})"
147
- else:
148
- # Fallback: just return cleaned title
149
- return title_text
150
 
151
- print(f"Could not find issue title in {issue_block}")
152
- raise ValueError(f"Parse error in issue title")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  def encode_issues_components_and_sparse():
155
  print("Initializing BGEM3FlagModel...")
 
110
 
111
  return desc_text, option_lines
112
 
113
+ BASE = "https://forum.nationstates.net/viewtopic.php?f=13&t=88"
114
 
115
+ def compute_start_from_anchor(anchor: int) -> int:
 
 
116
  """
117
+ Returns the 'start' offset for the forum URL given an integer anchor (issue number).
118
+ start increases by 25 every 500 anchors, beginning at 420.
119
  """
120
+ # k is 0 for [0..419], 1 for [420..919], 2 for [920..1419], 3 for [1420..], etc.
121
+ anchor = int(anchor)
122
+ k = int(((anchor - 420) / 500) + 1)
123
+ if anchor < 420:
124
+ k = 0
125
+ if k < 0:
126
+ k = 0
127
+ return 25 * k
128
+
129
+ def craft_issue_url(anchor: int) -> str:
130
+ start = compute_start_from_anchor(anchor)
131
+ if start == 0:
132
+ return f"{BASE}#{anchor}"
133
+ return f"{BASE}&start={start}#{anchor}"
134
+
135
+ ANCHOR_RE = re.compile(r"\[anchor=(\d+)\]")
136
+
137
+ def extract_anchor(issue_title_line: str):
138
+ """
139
+ From a title like:
140
+ [b][anchor=1379]#1379[/anchor]: [color=#CE532A][i]MADness:[/i][/color] A View to a Thrill ...
141
+ returns 1379 as int, or None if not found.
142
+ """
143
+ m = ANCHOR_RE.search(issue_title_line)
144
+ return m.group(1) if m else None
 
 
 
 
145
 
146
+ def format_issue_title_markdown(issue_block: str) -> tuple[str, str]:
147
+ """
148
+ Returns (display_markdown, url) such as:
149
+ "#1379: [MADness: A View to a Thrill](...#1379)"
150
+ Keeps chain/fancy formatting in the visible title (BBCode stripped),
151
+ and builds the correct paginated URL using the anchor.
152
+ """
153
+ # First non-empty line should be the title line
154
+ title_line = next((ln.strip() for ln in issue_block.splitlines() if ln.strip()), "")
155
+ anchor = extract_anchor(title_line)
156
+ # Extract visible title to the right of '[/anchor]:'
157
+ # Example matches "...[/anchor]: <title text>"
158
+ title_part = title_line.split('[/anchor]:', 1)[-1].strip() if '[/anchor]:' in title_line else title_line
159
+ # Strip BBCode for display text while preserving the chain wording itself
160
+ title_text = re.sub(r"\[/?[^\]]+\]", "", title_part).strip()
161
+ if anchor is None:
162
+ # Fallback: no anchor found; return plain title
163
+ return (title_text or "Untitled Issue", f"{BASE}")
164
+ url = craft_issue_url(anchor)
165
+ display = f"#{anchor}: [{title_text}]({url})"
166
+ return display
167
 
168
  def encode_issues_components_and_sparse():
169
  print("Initializing BGEM3FlagModel...")