Spaces:
Running
Running
Bohaska commited on
Commit ·
aeded65
1
Parent(s): aab4729
Fix issue URLs
Browse files- issue_titles.json +0 -0
- issue_titles_components.json +0 -0
- small_scripts/make_embedding/embedding.py +50 -36
issue_titles.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
issue_titles_components.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
small_scripts/make_embedding/embedding.py
CHANGED
|
@@ -110,46 +110,60 @@ def _parse_issue_strict(issue_block: str, global_issue_index: int):
|
|
| 110 |
|
| 111 |
return desc_text, option_lines
|
| 112 |
|
|
|
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
def format_issue_title_markdown(issue_block):
|
| 117 |
"""
|
| 118 |
-
|
| 119 |
-
|
| 120 |
"""
|
| 121 |
-
#
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
return f"#{anchor}: [{title_text}](https://forum.nationstates.net/viewtopic.php?f=13&t=88#{anchor})"
|
| 147 |
-
else:
|
| 148 |
-
# Fallback: just return cleaned title
|
| 149 |
-
return title_text
|
| 150 |
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
def encode_issues_components_and_sparse():
|
| 155 |
print("Initializing BGEM3FlagModel...")
|
|
|
|
| 110 |
|
| 111 |
return desc_text, option_lines
|
| 112 |
|
| 113 |
+
BASE = "https://forum.nationstates.net/viewtopic.php?f=13&t=88"
|
| 114 |
|
| 115 |
+
def compute_start_from_anchor(anchor: int) -> int:
|
|
|
|
|
|
|
| 116 |
"""
|
| 117 |
+
Returns the 'start' offset for the forum URL given an integer anchor (issue number).
|
| 118 |
+
start increases by 25 every 500 anchors, beginning at 420.
|
| 119 |
"""
|
| 120 |
+
# k is 0 for [0..419], 1 for [420..919], 2 for [920..1419], 3 for [1420..], etc.
|
| 121 |
+
anchor = int(anchor)
|
| 122 |
+
k = int(((anchor - 420) / 500) + 1)
|
| 123 |
+
if anchor < 420:
|
| 124 |
+
k = 0
|
| 125 |
+
if k < 0:
|
| 126 |
+
k = 0
|
| 127 |
+
return 25 * k
|
| 128 |
+
|
| 129 |
+
def craft_issue_url(anchor: int) -> str:
|
| 130 |
+
start = compute_start_from_anchor(anchor)
|
| 131 |
+
if start == 0:
|
| 132 |
+
return f"{BASE}#{anchor}"
|
| 133 |
+
return f"{BASE}&start={start}#{anchor}"
|
| 134 |
+
|
| 135 |
+
ANCHOR_RE = re.compile(r"\[anchor=(\d+)\]")
|
| 136 |
+
|
| 137 |
+
def extract_anchor(issue_title_line: str):
|
| 138 |
+
"""
|
| 139 |
+
From a title like:
|
| 140 |
+
[b][anchor=1379]#1379[/anchor]: [color=#CE532A][i]MADness:[/i][/color] A View to a Thrill ...
|
| 141 |
+
returns 1379 as int, or None if not found.
|
| 142 |
+
"""
|
| 143 |
+
m = ANCHOR_RE.search(issue_title_line)
|
| 144 |
+
return m.group(1) if m else None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
+
def format_issue_title_markdown(issue_block: str) -> tuple[str, str]:
|
| 147 |
+
"""
|
| 148 |
+
Returns (display_markdown, url) such as:
|
| 149 |
+
"#1379: [MADness: A View to a Thrill](...#1379)"
|
| 150 |
+
Keeps chain/fancy formatting in the visible title (BBCode stripped),
|
| 151 |
+
and builds the correct paginated URL using the anchor.
|
| 152 |
+
"""
|
| 153 |
+
# First non-empty line should be the title line
|
| 154 |
+
title_line = next((ln.strip() for ln in issue_block.splitlines() if ln.strip()), "")
|
| 155 |
+
anchor = extract_anchor(title_line)
|
| 156 |
+
# Extract visible title to the right of '[/anchor]:'
|
| 157 |
+
# Example matches "...[/anchor]: <title text>"
|
| 158 |
+
title_part = title_line.split('[/anchor]:', 1)[-1].strip() if '[/anchor]:' in title_line else title_line
|
| 159 |
+
# Strip BBCode for display text while preserving the chain wording itself
|
| 160 |
+
title_text = re.sub(r"\[/?[^\]]+\]", "", title_part).strip()
|
| 161 |
+
if anchor is None:
|
| 162 |
+
# Fallback: no anchor found; return plain title
|
| 163 |
+
return (title_text or "Untitled Issue", f"{BASE}")
|
| 164 |
+
url = craft_issue_url(anchor)
|
| 165 |
+
display = f"#{anchor}: [{title_text}]({url})"
|
| 166 |
+
return display
|
| 167 |
|
| 168 |
def encode_issues_components_and_sparse():
|
| 169 |
print("Initializing BGEM3FlagModel...")
|