linguabot commited on
Commit
5e54402
·
verified ·
1 Parent(s): d6334fc

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +6 -6
  2. routes/refinity.js +13 -164
  3. routes/tutorial-refinity.js +3 -45
Dockerfile CHANGED
@@ -1,5 +1,5 @@
1
- # Use Node.js 20 Alpine for closer parity with local (reduces HF vs local behavior drift)
2
- FROM node:20-alpine
3
 
4
  # Set working directory
5
  WORKDIR /app
@@ -7,9 +7,8 @@ WORKDIR /app
7
  # Copy package files
8
  COPY package*.json ./
9
 
10
- # Install dependencies deterministically from lockfile
11
- # (prevents HF vs local drift in docx/diff behavior)
12
- RUN npm ci --omit=dev
13
 
14
  # Copy server source code
15
  COPY . ./
@@ -30,6 +29,7 @@ HEALTHCHECK --interval=60s --timeout=10s --start-period=120s --retries=5 \
30
  CMD curl -f http://localhost:7860/api/health || exit 1
31
 
32
  # Start the application
 
33
  CMD npm start
34
 
35
- # rebuild trigger 2025-12-19T00:40:00Z
 
1
+ # Use Node.js 18 Alpine for smaller image size
2
+ FROM node:18-alpine
3
 
4
  # Set working directory
5
  WORKDIR /app
 
7
  # Copy package files
8
  COPY package*.json ./
9
 
10
+ # Install dependencies
11
+ RUN npm install --only=production
 
12
 
13
  # Copy server source code
14
  COPY . ./
 
29
  CMD curl -f http://localhost:7860/api/health || exit 1
30
 
31
  # Start the application
32
+ # rebuild trigger 2025-09-01T14:18:50Z
33
  CMD npm start
34
 
35
+ # rebuild trigger 2025-09-02T03:17:52Z
routes/refinity.js CHANGED
@@ -49,20 +49,6 @@ function safeDiffTokens(a, b) {
49
  }
50
  const { Document, Packer, Paragraph, TextRun, CommentRangeStart, CommentRangeEnd, CommentReference, Comments } = require('docx');
51
  const JSZip = require('jszip');
52
- const mongoose = require('mongoose');
53
-
54
- function safePkgVersion(name) {
55
- try {
56
- // Some packages (like docx) don't export package.json, so read from disk.
57
- const fs = require('fs');
58
- const path = require('path');
59
- const p = path.join(process.cwd(), 'node_modules', name, 'package.json');
60
- const j = JSON.parse(fs.readFileSync(p, 'utf8'));
61
- return j?.version || null;
62
- } catch {
63
- return null;
64
- }
65
- }
66
 
67
  const router = express.Router();
68
  const RefinityTask = require('../models/RefinityTask');
@@ -70,25 +56,6 @@ const RefinityVersion = require('../models/RefinityVersion');
70
  const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 8 * 1024 * 1024 } });
71
  const RefinityAnnotation = require('../models/RefinityAnnotation');
72
 
73
- // Diagnostic endpoint (helps verify which backend build HF is serving)
74
- router.get('/test', (req, res) => {
75
- res.json({
76
- message: 'refinity route is working',
77
- compareCommentsImpl: 'tutorial-refinity-copied',
78
- build: {
79
- signature: 'toolkit-export-origin-slice-remap-v3',
80
- node: process.version,
81
- docx: safePkgVersion('docx'),
82
- diff: safePkgVersion('diff'),
83
- db: {
84
- name: mongoose?.connection?.name || null,
85
- state: mongoose?.connection?.readyState ?? null,
86
- },
87
- },
88
- timestamp: new Date().toISOString(),
89
- });
90
- });
91
-
92
  // ---- Helpers ----
93
  function encodeRFC5987ValueChars(str) {
94
  return encodeURIComponent(str)
@@ -299,9 +266,6 @@ router.post('/compare-comments-with-corrections', async (req, res) => {
299
  const annotationVersionId = req.body?.annotationVersionId;
300
  const authorName = String(req.body?.authorName || 'Refinity');
301
  const authorInitials = String(req.body?.authorInitials || (authorName.split(/\s+/).map(s=>s[0]||'').join('').slice(0,3).toUpperCase()) || 'RF');
302
- const requester = String(req.headers['x-user-name'] || req.headers['x-user-email'] || '').toLowerCase();
303
- const roleHdr = String(req.headers['x-user-role'] || req.headers['user-role'] || '').toLowerCase();
304
- const isAdmin = roleHdr === 'admin';
305
 
306
  if (!annotationVersionId) {
307
  return res.status(400).json({ error: 'annotationVersionId is required' });
@@ -314,23 +278,16 @@ router.post('/compare-comments-with-corrections', async (req, res) => {
314
  }
315
 
316
  // Find all versions in the same task that come after the older version
317
- // (kept for compatibility / potential future use)
318
  const laterVersions = await RefinityVersion.find({
319
  taskId: olderVersion.taskId,
320
  versionNumber: { $gt: olderVersion.versionNumber }
321
  }).sort({ versionNumber: 1 }).lean();
322
 
323
- // Match Tutorial DR compare-sidebar behavior:
324
- // include annotations from the older version AND all later versions in the same task.
325
- // This allows compare(v1, v4) to surface annotations added while revising v2/v3 as well.
326
  const versionIds = [annotationVersionId, ...laterVersions.map(v => v._id)];
327
- const annQuery = { versionId: { $in: versionIds } };
328
- // Toolkit DR is shared across users; avoid mixing other users' highlights/corrections in exports.
329
- // Non-admin: only export annotations created by the requester.
330
- if (requester && !isAdmin) {
331
- annQuery.createdBy = requester;
332
- }
333
- const allAnns = await RefinityAnnotation.find(annQuery).sort({ start: 1, end: 1 }).lean();
334
 
335
  // Build a map of annotations by their text content for matching
336
  // Key: normalized text content, Value: annotation with error type, correction, and positions
@@ -342,50 +299,7 @@ router.post('/compare-comments-with-corrections', async (req, res) => {
342
  : laterVersions.find(v => v._id.toString() === ann.versionId.toString());
343
  if (!version) continue;
344
 
345
- // Important safety + backward compatibility for toolkit exports:
346
- // Annotation offsets are relative to the version they were created on. Applying those
347
- // offsets directly to `prev` can cause wrong edits / duplicated body text.
348
- //
349
- // If the origin slice at [start,end) doesn't match `prev` at the same offsets, try to
350
- // remap by searching the origin slice text inside `prev` and anchoring to the closest
351
- // occurrence. If we can't remap, skip (better than inventing edits).
352
- const rawStart = Number(ann.start || 0);
353
- const rawEnd = Number(ann.end || 0);
354
- const originContent = String(version.content || '');
355
- const originSlice = originContent.slice(Math.max(0, rawStart), Math.max(0, rawEnd));
356
- let mappedStart = rawStart;
357
- let mappedEnd = rawEnd;
358
- let annText = prev.slice(Math.max(0, rawStart), Math.max(0, rawEnd));
359
-
360
- // If `annText` is empty (out-of-range) OR mismatched, try to remap by text search.
361
- if (originSlice && originSlice !== annText) {
362
- // Collect all occurrences (not just first) so we can pick the closest to rawStart.
363
- const occurrences = [];
364
- let fromIdx = 0;
365
- while (fromIdx <= prev.length) {
366
- const idx = prev.indexOf(originSlice, fromIdx);
367
- if (idx === -1) break;
368
- occurrences.push(idx);
369
- // Advance by 1 to allow overlapping matches (rare, but safer for short CJK spans).
370
- fromIdx = idx + 1;
371
- }
372
- if (!occurrences.length) {
373
- continue;
374
- }
375
- let bestIdx = occurrences[0];
376
- let bestDist = Math.abs(bestIdx - rawStart);
377
- for (const idx of occurrences) {
378
- const d = Math.abs(idx - rawStart);
379
- if (d < bestDist) {
380
- bestDist = d;
381
- bestIdx = idx;
382
- }
383
- }
384
- mappedStart = bestIdx;
385
- mappedEnd = bestIdx + originSlice.length;
386
- annText = prev.slice(mappedStart, mappedEnd);
387
- }
388
-
389
  if (annText) {
390
  const normalized = annText.trim().replace(/\s+/g, ' ');
391
  // Store the annotation with its error type and correction
@@ -394,16 +308,16 @@ router.post('/compare-comments-with-corrections', async (req, res) => {
394
  }
395
  annByText.get(normalized).push({
396
  category: ann.category || 'other',
397
- start: mappedStart,
398
- end: mappedEnd,
399
  correction: ann.correction || '',
400
  originalAnn: ann, // Store full annotation for reference
401
  });
402
 
403
  // Also store by position for range matching
404
  annByPosition.push({
405
- start: mappedStart,
406
- end: mappedEnd,
407
  category: ann.category || 'other',
408
  correction: ann.correction || '',
409
  selectedText: annText,
@@ -434,50 +348,6 @@ router.post('/compare-comments-with-corrections', async (req, res) => {
434
  return union.size > 0 ? intersection.size / union.size : 0;
435
  }
436
 
437
- // ANNOTATION-FIRST APPROACH (exactly like Tutorial DR):
438
- // Only process annotations from the database. Skip all diff-based processing.
439
- const USE_ANNOTATION_FIRST = true;
440
- let mergedAnnotations = [];
441
- if (USE_ANNOTATION_FIRST) {
442
- const annotationBasedItems = [];
443
- for (const ann of annByPosition) {
444
- const annSelectedText = ann.selectedText;
445
- const annCorrection = ann.correction || '';
446
- const annCategory = ann.category || 'other';
447
- const isDeletion = !annCorrection || annCorrection.trim() === '' || annCorrection.trim() === annSelectedText.trim();
448
- const isInsertion = annSelectedText.trim().length <= 2 && annCorrection.trim().length > annSelectedText.trim().length;
449
- annotationBasedItems.push({
450
- start: ann.start,
451
- end: ann.end,
452
- removedText: annSelectedText,
453
- replacementText: annCorrection,
454
- category: annCategory,
455
- isDeleted: isDeletion,
456
- isInsertion: isInsertion,
457
- isMove: false,
458
- });
459
- }
460
- annotationBasedItems.sort((a, b) => a.start - b.start || a.end - b.end);
461
- mergedAnnotations = (() => {
462
- const out = [];
463
- const seen = new Set();
464
- for (const it of annotationBasedItems) {
465
- const key = [
466
- it.start ?? '',
467
- it.end ?? '',
468
- String(it.category || ''),
469
- String(it.replacementText || ''),
470
- it.isDeleted ? '1' : '0',
471
- it.isInsertion ? '1' : '0',
472
- it.isMove ? '1' : '0',
473
- ].join('|');
474
- if (seen.has(key)) continue;
475
- seen.add(key);
476
- out.push(it);
477
- }
478
- return out;
479
- })();
480
- } else {
481
  // 1) Run diff to find ALL changes (like Show Diff does)
482
  const diffParts = safeDiffTokens(prev, current);
483
 
@@ -1192,31 +1062,7 @@ router.post('/compare-comments-with-corrections', async (req, res) => {
1192
  return finalAnnotations;
1193
  };
1194
 
1195
- mergedAnnotations = mergeDeletionInsertionPairs(diffBasedAnnotations);
1196
-
1197
- // De-dupe identical change items so sidebar-comment export doesn't duplicate spans/comments.
1198
- // This mirrors the tutorial DR fix and keeps behavior minimal/safe.
1199
- {
1200
- const out = [];
1201
- const seen = new Set();
1202
- for (const it of mergedAnnotations) {
1203
- const key = [
1204
- it.start ?? '',
1205
- it.end ?? '',
1206
- String(it.category || ''),
1207
- String(it.removedText || ''),
1208
- String(it.replacementText || ''),
1209
- it.isDeleted ? '1' : '0',
1210
- it.isInsertion ? '1' : '0',
1211
- it.isMove ? '1' : '0',
1212
- ].join('|');
1213
- if (seen.has(key)) continue;
1214
- seen.add(key);
1215
- out.push(it);
1216
- }
1217
- mergedAnnotations = out;
1218
- }
1219
- } // end diff-first fallback
1220
 
1221
  // 3) Split the older text into logical lines with their global offsets
1222
  const lines = [];
@@ -1423,14 +1269,17 @@ router.post('/compare-comments-with-corrections', async (req, res) => {
1423
 
1424
  let commentChildren;
1425
  if (item.isDeleted) {
 
1426
  commentChildren = [
1427
  new TextRun({ text: `[${categoryLabel}] ` }),
1428
  new TextRun({ text: original, strike: true }),
1429
  ];
1430
  } else if (item.isInsertion) {
 
1431
  const commentText = `[${categoryLabel}] → ${newer}`;
1432
  commentChildren = [ new TextRun({ text: commentText }) ];
1433
  } else {
 
1434
  const commentText = `[${categoryLabel}] ${original} → ${newer}`;
1435
  commentChildren = [ new TextRun({ text: commentText }) ];
1436
  }
 
49
  }
50
  const { Document, Packer, Paragraph, TextRun, CommentRangeStart, CommentRangeEnd, CommentReference, Comments } = require('docx');
51
  const JSZip = require('jszip');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  const router = express.Router();
54
  const RefinityTask = require('../models/RefinityTask');
 
56
  const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 8 * 1024 * 1024 } });
57
  const RefinityAnnotation = require('../models/RefinityAnnotation');
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  // ---- Helpers ----
60
  function encodeRFC5987ValueChars(str) {
61
  return encodeURIComponent(str)
 
266
  const annotationVersionId = req.body?.annotationVersionId;
267
  const authorName = String(req.body?.authorName || 'Refinity');
268
  const authorInitials = String(req.body?.authorInitials || (authorName.split(/\s+/).map(s=>s[0]||'').join('').slice(0,3).toUpperCase()) || 'RF');
 
 
 
269
 
270
  if (!annotationVersionId) {
271
  return res.status(400).json({ error: 'annotationVersionId is required' });
 
278
  }
279
 
280
  // Find all versions in the same task that come after the older version
 
281
  const laterVersions = await RefinityVersion.find({
282
  taskId: olderVersion.taskId,
283
  versionNumber: { $gt: olderVersion.versionNumber }
284
  }).sort({ versionNumber: 1 }).lean();
285
 
286
+ // Get all annotations from the older version and all later versions
 
 
287
  const versionIds = [annotationVersionId, ...laterVersions.map(v => v._id)];
288
+ const allAnns = await RefinityAnnotation.find({
289
+ versionId: { $in: versionIds }
290
+ }).sort({ start: 1, end: 1 }).lean();
 
 
 
 
291
 
292
  // Build a map of annotations by their text content for matching
293
  // Key: normalized text content, Value: annotation with error type, correction, and positions
 
299
  : laterVersions.find(v => v._id.toString() === ann.versionId.toString());
300
  if (!version) continue;
301
 
302
+ const annText = prev.slice(ann.start || 0, ann.end || 0);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  if (annText) {
304
  const normalized = annText.trim().replace(/\s+/g, ' ');
305
  // Store the annotation with its error type and correction
 
308
  }
309
  annByText.get(normalized).push({
310
  category: ann.category || 'other',
311
+ start: ann.start,
312
+ end: ann.end,
313
  correction: ann.correction || '',
314
  originalAnn: ann, // Store full annotation for reference
315
  });
316
 
317
  // Also store by position for range matching
318
  annByPosition.push({
319
+ start: ann.start || 0,
320
+ end: ann.end || 0,
321
  category: ann.category || 'other',
322
  correction: ann.correction || '',
323
  selectedText: annText,
 
348
  return union.size > 0 ? intersection.size / union.size : 0;
349
  }
350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  // 1) Run diff to find ALL changes (like Show Diff does)
352
  const diffParts = safeDiffTokens(prev, current);
353
 
 
1062
  return finalAnnotations;
1063
  };
1064
 
1065
+ const mergedAnnotations = mergeDeletionInsertionPairs(diffBasedAnnotations);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1066
 
1067
  // 3) Split the older text into logical lines with their global offsets
1068
  const lines = [];
 
1269
 
1270
  let commentChildren;
1271
  if (item.isDeleted) {
1272
+ // Deletion: show error type + original text with strikethrough
1273
  commentChildren = [
1274
  new TextRun({ text: `[${categoryLabel}] ` }),
1275
  new TextRun({ text: original, strike: true }),
1276
  ];
1277
  } else if (item.isInsertion) {
1278
+ // Insertion: show error type + → inserted text
1279
  const commentText = `[${categoryLabel}] → ${newer}`;
1280
  commentChildren = [ new TextRun({ text: commentText }) ];
1281
  } else {
1282
+ // Change: show error type + original → newer
1283
  const commentText = `[${categoryLabel}] ${original} → ${newer}`;
1284
  commentChildren = [ new TextRun({ text: commentText }) ];
1285
  }
routes/tutorial-refinity.js CHANGED
@@ -49,18 +49,6 @@ const TutorialRefinityVersion = require('../models/TutorialRefinityVersion');
49
  const TutorialRefinityAnnotation = require('../models/TutorialRefinityAnnotation');
50
  const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 8 * 1024 * 1024 } });
51
 
52
- function safePkgVersion(name) {
53
- try {
54
- const fs = require('fs');
55
- const path = require('path');
56
- const p = path.join(process.cwd(), 'node_modules', name, 'package.json');
57
- const j = JSON.parse(fs.readFileSync(p, 'utf8'));
58
- return j?.version || null;
59
- } catch {
60
- return null;
61
- }
62
- }
63
-
64
  // Helper to set download headers
65
  function setDownloadHeaders(res, filename) {
66
  res.setHeader('Content-Type', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
@@ -271,29 +259,8 @@ router.post('/compare-comments-with-corrections', async (req, res) => {
271
 
272
  // Sort by position
273
  annotationBasedItems.sort((a, b) => a.start - b.start || a.end - b.end);
274
-
275
- // De-dupe identical annotations (same span/category/correction/flags).
276
- // Without this, duplicated DB rows (or re-annotated identical spans) can cause the
277
- // generator to literally duplicate the span text in the document body.
278
- const mergedAnnotations = (() => {
279
- const out = [];
280
- const seen = new Set();
281
- for (const it of annotationBasedItems) {
282
- const key = [
283
- it.start ?? '',
284
- it.end ?? '',
285
- String(it.category || ''),
286
- String(it.replacementText || ''),
287
- it.isDeleted ? '1' : '0',
288
- it.isInsertion ? '1' : '0',
289
- it.isMove ? '1' : '0',
290
- ].join('|');
291
- if (seen.has(key)) continue;
292
- seen.add(key);
293
- out.push(it);
294
- }
295
- return out;
296
- })();
297
 
298
  const lines = [];
299
  let pos = 0;
@@ -588,16 +555,7 @@ router.post('/export-plain', async (req, res) => {
588
 
589
  // ----- Task APIs -----
590
  router.get('/test', (req, res) => {
591
- res.json({
592
- message: 'tutorial-refinity route is working',
593
- build: {
594
- signature: 'tutorial-refinity-export-v1',
595
- node: process.version,
596
- docx: safePkgVersion('docx'),
597
- diff: safePkgVersion('diff'),
598
- },
599
- timestamp: new Date().toISOString(),
600
- });
601
  });
602
 
603
  // Diagnostic endpoint to list all tasks (for debugging)
 
49
  const TutorialRefinityAnnotation = require('../models/TutorialRefinityAnnotation');
50
  const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 8 * 1024 * 1024 } });
51
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  // Helper to set download headers
53
  function setDownloadHeaders(res, filename) {
54
  res.setHeader('Content-Type', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
 
259
 
260
  // Sort by position
261
  annotationBasedItems.sort((a, b) => a.start - b.start || a.end - b.end);
262
+
263
+ const mergedAnnotations = annotationBasedItems;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
  const lines = [];
266
  let pos = 0;
 
555
 
556
  // ----- Task APIs -----
557
  router.get('/test', (req, res) => {
558
+ res.json({ message: 'tutorial-refinity route is working', timestamp: new Date().toISOString() });
 
 
 
 
 
 
 
 
 
559
  });
560
 
561
  // Diagnostic endpoint to list all tasks (for debugging)