Spaces:
Sleeping
Sleeping
Commit ·
bd79ab1
1
Parent(s): ca6c843
fix: strip markdown from rawText before indexOf for annotation position
Browse filesSelected text from browser is plain but rawText has markdown formatting
(** bold **, # headings, etc.), causing indexOf to fail and return null
start/end. Now strips markdown before searching, with fallback to
case-insensitive search.
- app/page.js +35 -6
app/page.js
CHANGED
|
@@ -145,31 +145,60 @@ export default function Home() {
|
|
| 145 |
};
|
| 146 |
|
| 147 |
const handleAnnotationSubmit = async ({ dataset_tag }) => {
|
| 148 |
-
// Find ALL occurrences of the selected text in input_text
|
| 149 |
const inputText = currentPageData?.input_text || "";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
const occurrences = [];
|
| 151 |
let searchFrom = 0;
|
| 152 |
-
while (searchFrom <
|
| 153 |
-
const idx =
|
| 154 |
if (idx === -1) break;
|
| 155 |
occurrences.push(idx);
|
| 156 |
searchFrom = idx + 1;
|
| 157 |
}
|
| 158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
let startIdx = null;
|
| 160 |
if (occurrences.length === 1) {
|
| 161 |
startIdx = occurrences[0];
|
| 162 |
} else if (occurrences.length > 1) {
|
| 163 |
const container = document.querySelector('.markdown-preview');
|
| 164 |
-
const visibleLen = container?.textContent?.length ||
|
| 165 |
-
const ratio =
|
| 166 |
const estimatedSourcePos = selectionOffset * ratio;
|
| 167 |
startIdx = occurrences.reduce((best, idx) =>
|
| 168 |
Math.abs(idx - estimatedSourcePos) < Math.abs(best - estimatedSourcePos) ? idx : best
|
| 169 |
);
|
| 170 |
}
|
| 171 |
|
| 172 |
-
const endIdx = startIdx !== null ? startIdx +
|
| 173 |
|
| 174 |
const payload = {
|
| 175 |
dataset_name: {
|
|
|
|
| 145 |
};
|
| 146 |
|
| 147 |
const handleAnnotationSubmit = async ({ dataset_tag }) => {
|
|
|
|
| 148 |
const inputText = currentPageData?.input_text || "";
|
| 149 |
+
|
| 150 |
+
// Strip markdown formatting so browser-selected plain text can be found
|
| 151 |
+
const stripMd = (s) => s
|
| 152 |
+
.replace(/\*\*\*/g, '') // bold-italic
|
| 153 |
+
.replace(/\*\*/g, '') // bold
|
| 154 |
+
.replace(/\*/g, '') // italic
|
| 155 |
+
.replace(/__/g, '')
|
| 156 |
+
.replace(/_/g, ' ')
|
| 157 |
+
.replace(/^#{1,6}\s+/gm, '') // headings
|
| 158 |
+
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); // links
|
| 159 |
+
|
| 160 |
+
const plainText = stripMd(inputText);
|
| 161 |
+
|
| 162 |
+
// Normalize whitespace in selected text (browser may add extra spaces/newlines)
|
| 163 |
+
const normalizedSelection = selectedText.replace(/\s+/g, ' ').trim();
|
| 164 |
+
|
| 165 |
+
// Find occurrences in the stripped text
|
| 166 |
const occurrences = [];
|
| 167 |
let searchFrom = 0;
|
| 168 |
+
while (searchFrom < plainText.length) {
|
| 169 |
+
const idx = plainText.indexOf(normalizedSelection, searchFrom);
|
| 170 |
if (idx === -1) break;
|
| 171 |
occurrences.push(idx);
|
| 172 |
searchFrom = idx + 1;
|
| 173 |
}
|
| 174 |
|
| 175 |
+
// If not found with exact match, try case-insensitive
|
| 176 |
+
if (occurrences.length === 0) {
|
| 177 |
+
const lowerPlain = plainText.toLowerCase();
|
| 178 |
+
const lowerSel = normalizedSelection.toLowerCase();
|
| 179 |
+
let sf = 0;
|
| 180 |
+
while (sf < lowerPlain.length) {
|
| 181 |
+
const idx = lowerPlain.indexOf(lowerSel, sf);
|
| 182 |
+
if (idx === -1) break;
|
| 183 |
+
occurrences.push(idx);
|
| 184 |
+
sf = idx + 1;
|
| 185 |
+
}
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
let startIdx = null;
|
| 189 |
if (occurrences.length === 1) {
|
| 190 |
startIdx = occurrences[0];
|
| 191 |
} else if (occurrences.length > 1) {
|
| 192 |
const container = document.querySelector('.markdown-preview');
|
| 193 |
+
const visibleLen = container?.textContent?.length || plainText.length;
|
| 194 |
+
const ratio = plainText.length / visibleLen;
|
| 195 |
const estimatedSourcePos = selectionOffset * ratio;
|
| 196 |
startIdx = occurrences.reduce((best, idx) =>
|
| 197 |
Math.abs(idx - estimatedSourcePos) < Math.abs(best - estimatedSourcePos) ? idx : best
|
| 198 |
);
|
| 199 |
}
|
| 200 |
|
| 201 |
+
const endIdx = startIdx !== null ? startIdx + normalizedSelection.length : null;
|
| 202 |
|
| 203 |
const payload = {
|
| 204 |
dataset_name: {
|