Alleinzellgaenger commited on
Commit
9e83da7
·
1 Parent(s): e79fdda

Highlighintg works now perfectly!

Browse files
backend/app.py CHANGED
@@ -296,7 +296,15 @@ Return a list of chunks with topic, start_phrase, and end_phrase for each."""
296
  positioned_chunks = []
297
  for chunk in chunks:
298
  start_pos = fuzzy_find(page_markdown, chunk.start_phrase)
299
- end_pos = fuzzy_find(page_markdown, chunk.end_phrase, start_pos or 0)
 
 
 
 
 
 
 
 
300
 
301
  if start_pos is not None:
302
  positioned_chunks.append({
@@ -367,7 +375,15 @@ Return a list of chunks with topic, start_phrase, and end_phrase for each."""
367
  positioned_chunks = []
368
  for chunk in chunks:
369
  start_pos = fuzzy_find(page_markdown, chunk.start_phrase)
370
- end_pos = fuzzy_find(page_markdown, chunk.end_phrase, start_pos or 0)
 
 
 
 
 
 
 
 
371
 
372
  if start_pos is not None:
373
  positioned_chunks.append({
 
296
  positioned_chunks = []
297
  for chunk in chunks:
298
  start_pos = fuzzy_find(page_markdown, chunk.start_phrase)
299
+ end_phrase_start = fuzzy_find(page_markdown, chunk.end_phrase, start_pos or 0)
300
+ # Add the length of the end_phrase plus a bit more to include punctuation
301
+ if end_phrase_start is not None:
302
+ end_pos = end_phrase_start + len(chunk.end_phrase)
303
+ # Try to include punctuation that might follow
304
+ if end_pos < len(page_markdown) and page_markdown[end_pos] in '.!?;:,':
305
+ end_pos += 1
306
+ else:
307
+ end_pos = None
308
 
309
  if start_pos is not None:
310
  positioned_chunks.append({
 
375
  positioned_chunks = []
376
  for chunk in chunks:
377
  start_pos = fuzzy_find(page_markdown, chunk.start_phrase)
378
+ end_phrase_start = fuzzy_find(page_markdown, chunk.end_phrase, start_pos or 0)
379
+ # Add the length of the end_phrase plus a bit more to include punctuation
380
+ if end_phrase_start is not None:
381
+ end_pos = end_phrase_start + len(chunk.end_phrase)
382
+ # Try to include punctuation that might follow
383
+ if end_pos < len(page_markdown) and page_markdown[end_pos] in '.!?;:,':
384
+ end_pos += 1
385
+ else:
386
+ end_pos = None
387
 
388
  if start_pos is not None:
389
  positioned_chunks.append({
frontend/package-lock.json CHANGED
@@ -19,6 +19,7 @@
19
  "react-pdf": "^10.0.1",
20
  "react-router-dom": "^7.7.0",
21
  "rehype-katex": "^7.0.1",
 
22
  "remark-math": "^6.0.0",
23
  "tailwindcss": "^4.1.11"
24
  },
@@ -2918,6 +2919,31 @@
2918
  "url": "https://opencollective.com/unified"
2919
  }
2920
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2921
  "node_modules/hast-util-to-jsx-runtime": {
2922
  "version": "2.3.6",
2923
  "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.6.tgz",
@@ -2945,6 +2971,35 @@
2945
  "url": "https://opencollective.com/unified"
2946
  }
2947
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2948
  "node_modules/hast-util-to-text": {
2949
  "version": "4.0.2",
2950
  "resolved": "https://registry.npmjs.org/hast-util-to-text/-/hast-util-to-text-4.0.2.tgz",
@@ -3001,6 +3056,16 @@
3001
  "url": "https://opencollective.com/unified"
3002
  }
3003
  },
 
 
 
 
 
 
 
 
 
 
3004
  "node_modules/ignore": {
3005
  "version": "5.3.2",
3006
  "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
@@ -4705,6 +4770,21 @@
4705
  "url": "https://opencollective.com/unified"
4706
  }
4707
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4708
  "node_modules/remark-math": {
4709
  "version": "6.0.0",
4710
  "resolved": "https://registry.npmjs.org/remark-math/-/remark-math-6.0.0.tgz",
 
19
  "react-pdf": "^10.0.1",
20
  "react-router-dom": "^7.7.0",
21
  "rehype-katex": "^7.0.1",
22
+ "rehype-raw": "^7.0.0",
23
  "remark-math": "^6.0.0",
24
  "tailwindcss": "^4.1.11"
25
  },
 
2919
  "url": "https://opencollective.com/unified"
2920
  }
2921
  },
2922
+ "node_modules/hast-util-raw": {
2923
+ "version": "9.1.0",
2924
+ "resolved": "https://registry.npmjs.org/hast-util-raw/-/hast-util-raw-9.1.0.tgz",
2925
+ "integrity": "sha512-Y8/SBAHkZGoNkpzqqfCldijcuUKh7/su31kEBp67cFY09Wy0mTRgtsLYsiIxMJxlu0f6AA5SUTbDR8K0rxnbUw==",
2926
+ "license": "MIT",
2927
+ "dependencies": {
2928
+ "@types/hast": "^3.0.0",
2929
+ "@types/unist": "^3.0.0",
2930
+ "@ungap/structured-clone": "^1.0.0",
2931
+ "hast-util-from-parse5": "^8.0.0",
2932
+ "hast-util-to-parse5": "^8.0.0",
2933
+ "html-void-elements": "^3.0.0",
2934
+ "mdast-util-to-hast": "^13.0.0",
2935
+ "parse5": "^7.0.0",
2936
+ "unist-util-position": "^5.0.0",
2937
+ "unist-util-visit": "^5.0.0",
2938
+ "vfile": "^6.0.0",
2939
+ "web-namespaces": "^2.0.0",
2940
+ "zwitch": "^2.0.0"
2941
+ },
2942
+ "funding": {
2943
+ "type": "opencollective",
2944
+ "url": "https://opencollective.com/unified"
2945
+ }
2946
+ },
2947
  "node_modules/hast-util-to-jsx-runtime": {
2948
  "version": "2.3.6",
2949
  "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.6.tgz",
 
2971
  "url": "https://opencollective.com/unified"
2972
  }
2973
  },
2974
+ "node_modules/hast-util-to-parse5": {
2975
+ "version": "8.0.0",
2976
+ "resolved": "https://registry.npmjs.org/hast-util-to-parse5/-/hast-util-to-parse5-8.0.0.tgz",
2977
+ "integrity": "sha512-3KKrV5ZVI8if87DVSi1vDeByYrkGzg4mEfeu4alwgmmIeARiBLKCZS2uw5Gb6nU9x9Yufyj3iudm6i7nl52PFw==",
2978
+ "license": "MIT",
2979
+ "dependencies": {
2980
+ "@types/hast": "^3.0.0",
2981
+ "comma-separated-tokens": "^2.0.0",
2982
+ "devlop": "^1.0.0",
2983
+ "property-information": "^6.0.0",
2984
+ "space-separated-tokens": "^2.0.0",
2985
+ "web-namespaces": "^2.0.0",
2986
+ "zwitch": "^2.0.0"
2987
+ },
2988
+ "funding": {
2989
+ "type": "opencollective",
2990
+ "url": "https://opencollective.com/unified"
2991
+ }
2992
+ },
2993
+ "node_modules/hast-util-to-parse5/node_modules/property-information": {
2994
+ "version": "6.5.0",
2995
+ "resolved": "https://registry.npmjs.org/property-information/-/property-information-6.5.0.tgz",
2996
+ "integrity": "sha512-PgTgs/BlvHxOu8QuEN7wi5A0OmXaBcHpmCSTehcs6Uuu9IkDIEo13Hy7n898RHfrQ49vKCoGeWZSaAK01nwVig==",
2997
+ "license": "MIT",
2998
+ "funding": {
2999
+ "type": "github",
3000
+ "url": "https://github.com/sponsors/wooorm"
3001
+ }
3002
+ },
3003
  "node_modules/hast-util-to-text": {
3004
  "version": "4.0.2",
3005
  "resolved": "https://registry.npmjs.org/hast-util-to-text/-/hast-util-to-text-4.0.2.tgz",
 
3056
  "url": "https://opencollective.com/unified"
3057
  }
3058
  },
3059
+ "node_modules/html-void-elements": {
3060
+ "version": "3.0.0",
3061
+ "resolved": "https://registry.npmjs.org/html-void-elements/-/html-void-elements-3.0.0.tgz",
3062
+ "integrity": "sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==",
3063
+ "license": "MIT",
3064
+ "funding": {
3065
+ "type": "github",
3066
+ "url": "https://github.com/sponsors/wooorm"
3067
+ }
3068
+ },
3069
  "node_modules/ignore": {
3070
  "version": "5.3.2",
3071
  "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
 
4770
  "url": "https://opencollective.com/unified"
4771
  }
4772
  },
4773
+ "node_modules/rehype-raw": {
4774
+ "version": "7.0.0",
4775
+ "resolved": "https://registry.npmjs.org/rehype-raw/-/rehype-raw-7.0.0.tgz",
4776
+ "integrity": "sha512-/aE8hCfKlQeA8LmyeyQvQF3eBiLRGNlfBJEvWH7ivp9sBqs7TNqBL5X3v157rM4IFETqDnIOO+z5M/biZbo9Ww==",
4777
+ "license": "MIT",
4778
+ "dependencies": {
4779
+ "@types/hast": "^3.0.0",
4780
+ "hast-util-raw": "^9.0.0",
4781
+ "vfile": "^6.0.0"
4782
+ },
4783
+ "funding": {
4784
+ "type": "opencollective",
4785
+ "url": "https://opencollective.com/unified"
4786
+ }
4787
+ },
4788
  "node_modules/remark-math": {
4789
  "version": "6.0.0",
4790
  "resolved": "https://registry.npmjs.org/remark-math/-/remark-math-6.0.0.tgz",
frontend/package.json CHANGED
@@ -21,6 +21,7 @@
21
  "react-pdf": "^10.0.1",
22
  "react-router-dom": "^7.7.0",
23
  "rehype-katex": "^7.0.1",
 
24
  "remark-math": "^6.0.0",
25
  "tailwindcss": "^4.1.11"
26
  },
 
21
  "react-pdf": "^10.0.1",
22
  "react-router-dom": "^7.7.0",
23
  "rehype-katex": "^7.0.1",
24
+ "rehype-raw": "^7.0.0",
25
  "remark-math": "^6.0.0",
26
  "tailwindcss": "^4.1.11"
27
  },
frontend/src/components/DocumentProcessor.jsx CHANGED
@@ -1,11 +1,38 @@
1
- import { useState, useRef, useEffect } from 'react';
2
  import ReactMarkdown from 'react-markdown';
3
  import remarkMath from 'remark-math';
4
  import rehypeKatex from 'rehype-katex';
 
5
  import 'katex/dist/katex.min.css';
6
 
7
- // Let's go back to the simple approach: render normally and add a simple overlay for chunks
8
- // This avoids the complexity of trying to match text content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  function DocumentProcessor() {
11
  const fileInputRef = useRef(null);
@@ -480,10 +507,32 @@ function DocumentProcessor() {
480
 
481
  {/* Content */}
482
  <div className="flex-1 px-6 pt-6 pb-8 overflow-y-auto">
483
- <div className="prose prose-sm max-w-none">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
  <ReactMarkdown
485
  remarkPlugins={[remarkMath]}
486
- rehypePlugins={[rehypeKatex]}
487
  components={{
488
  h1: ({ children }) => <h1 style={{ fontSize: '1.5rem', fontWeight: 'bold', marginBottom: '1rem', color: '#1a202c' }}>{children}</h1>,
489
  h2: ({ children }) => <h2 style={{ fontSize: '1.25rem', fontWeight: 'bold', marginBottom: '0.75rem', marginTop: '1.5rem', color: '#1a202c' }}>{children}</h2>,
@@ -504,10 +553,15 @@ function DocumentProcessor() {
504
  <pre style={{ backgroundColor: '#f3f4f6', padding: '0.75rem', borderRadius: '0.375rem', overflowX: 'auto', margin: '0.75rem 0' }}>
505
  <code style={{ fontSize: '0.75rem', fontFamily: 'monospace' }}>{children}</code>
506
  </pre>,
 
 
 
 
 
507
  img: ({ src, alt }) => <ImageComponent src={src} alt={alt} />
508
  }}
509
  >
510
- {documentData.markdown}
511
  </ReactMarkdown>
512
  </div>
513
  </div>
@@ -601,7 +655,7 @@ function DocumentProcessor() {
601
  <div className="prose prose-sm max-w-none">
602
  <ReactMarkdown
603
  remarkPlugins={[remarkMath]}
604
- rehypePlugins={[rehypeKatex]}
605
  components={{
606
  h1: ({ children }) => <h1 style={{ fontSize: '1.25rem', fontWeight: 'bold', marginBottom: '0.75rem', color: '#1a202c' }}>{children}</h1>,
607
  h2: ({ children }) => <h2 style={{ fontSize: '1.125rem', fontWeight: 'bold', marginBottom: '0.5rem', marginTop: '1rem', color: '#1a202c' }}>{children}</h2>,
@@ -685,7 +739,7 @@ function DocumentProcessor() {
685
  <div className="prose prose-sm max-w-none">
686
  <ReactMarkdown
687
  remarkPlugins={[remarkMath]}
688
- rehypePlugins={[rehypeKatex]}
689
  components={{
690
  p: ({ children }) => <p className="mb-2 text-gray-800 leading-relaxed">{children}</p>,
691
  ul: ({ children }) => <ul className="mb-2 ml-4 list-disc">{children}</ul>,
@@ -715,7 +769,7 @@ function DocumentProcessor() {
715
  <div className="prose prose-sm max-w-none">
716
  <ReactMarkdown
717
  remarkPlugins={[remarkMath]}
718
- rehypePlugins={[rehypeKatex]}
719
  components={{
720
  p: ({ children }) => <p className="mb-2 text-gray-800 leading-relaxed">{children}</p>,
721
  ul: ({ children }) => <ul className="mb-2 ml-4 list-disc">{children}</ul>,
@@ -749,7 +803,7 @@ function DocumentProcessor() {
749
  <div className="prose prose-sm max-w-none">
750
  <ReactMarkdown
751
  remarkPlugins={[remarkMath]}
752
- rehypePlugins={[rehypeKatex]}
753
  components={{
754
  p: ({ children }) => <p className="mb-2 text-gray-800 leading-relaxed">{children}</p>,
755
  ul: ({ children }) => <ul className="mb-2 ml-4 list-disc">{children}</ul>,
 
1
+ import { useState, useRef, useEffect, useCallback } from 'react';
2
  import ReactMarkdown from 'react-markdown';
3
  import remarkMath from 'remark-math';
4
  import rehypeKatex from 'rehype-katex';
5
+ import rehypeRaw from 'rehype-raw';
6
  import 'katex/dist/katex.min.css';
7
 
8
+ // Simple function to highlight current chunk in markdown before rendering
9
+ const highlightChunkInMarkdown = (markdown, chunks, currentChunkIndex) => {
10
+ if (!chunks || !chunks[currentChunkIndex] || !markdown) {
11
+ return markdown;
12
+ }
13
+
14
+ const chunk = chunks[currentChunkIndex];
15
+ const chunkText = markdown.slice(chunk.start_position, chunk.end_position);
16
+
17
+ // Debug logging
18
+ console.log('Chunk debugging:', {
19
+ chunkIndex: currentChunkIndex,
20
+ startPos: chunk.start_position,
21
+ endPos: chunk.end_position,
22
+ chunkTextLength: chunkText.length,
23
+ chunkTextPreview: chunkText.substring(0, 50) + '...',
24
+ beforeText: markdown.slice(Math.max(0, chunk.start_position - 20), chunk.start_position),
25
+ afterText: markdown.slice(chunk.end_position, chunk.end_position + 20)
26
+ });
27
+
28
+ // Use div wrapper that extends into document margins with left border and fade-in animation
29
+ const highlightedChunk = `<div style="background-color: rgba(255, 214, 100, 0.15); border-left: 4px solid rgba(156, 163, 175, 0.5); padding: 0.75rem; margin: 0.5rem -1.5rem; font-size: 0.875rem; line-height: 1.5; color: rgb(55, 65, 81); animation: fadeInHighlight 200ms ease-out;">${chunkText}</div>`;
30
+
31
+ // Replace the original chunk with the highlighted version
32
+ return markdown.slice(0, chunk.start_position) +
33
+ highlightedChunk +
34
+ markdown.slice(chunk.end_position);
35
+ };
36
 
37
  function DocumentProcessor() {
38
  const fileInputRef = useRef(null);
 
507
 
508
  {/* Content */}
509
  <div className="flex-1 px-6 pt-6 pb-8 overflow-y-auto">
510
+ <style>
511
+ {`
512
+ @keyframes fadeInHighlight {
513
+ 0% {
514
+ background-color: rgba(255, 214, 100, 0);
515
+ border-left-color: rgba(156, 163, 175, 0);
516
+ transform: translateX(-10px);
517
+ opacity: 0;
518
+ }
519
+ 100% {
520
+ background-color: rgba(255, 214, 100, 0.15);
521
+ border-left-color: rgba(156, 163, 175, 0.5);
522
+ transform: translateX(0);
523
+ opacity: 1;
524
+ }
525
+ }
526
+ `}
527
+ </style>
528
+ <div className="prose prose-sm max-w-none" style={{
529
+ fontSize: '0.875rem',
530
+ lineHeight: '1.5',
531
+ color: 'rgb(55, 65, 81)'
532
+ }}>
533
  <ReactMarkdown
534
  remarkPlugins={[remarkMath]}
535
+ rehypePlugins={[rehypeRaw, rehypeKatex]}
536
  components={{
537
  h1: ({ children }) => <h1 style={{ fontSize: '1.5rem', fontWeight: 'bold', marginBottom: '1rem', color: '#1a202c' }}>{children}</h1>,
538
  h2: ({ children }) => <h2 style={{ fontSize: '1.25rem', fontWeight: 'bold', marginBottom: '0.75rem', marginTop: '1.5rem', color: '#1a202c' }}>{children}</h2>,
 
553
  <pre style={{ backgroundColor: '#f3f4f6', padding: '0.75rem', borderRadius: '0.375rem', overflowX: 'auto', margin: '0.75rem 0' }}>
554
  <code style={{ fontSize: '0.75rem', fontFamily: 'monospace' }}>{children}</code>
555
  </pre>,
556
+ div: ({ children, style }) => (
557
+ <div style={style}>
558
+ {children}
559
+ </div>
560
+ ),
561
  img: ({ src, alt }) => <ImageComponent src={src} alt={alt} />
562
  }}
563
  >
564
+ {highlightChunkInMarkdown(documentData.markdown, documentData.chunks, currentChunkIndex)}
565
  </ReactMarkdown>
566
  </div>
567
  </div>
 
655
  <div className="prose prose-sm max-w-none">
656
  <ReactMarkdown
657
  remarkPlugins={[remarkMath]}
658
+ rehypePlugins={[rehypeRaw, rehypeKatex]}
659
  components={{
660
  h1: ({ children }) => <h1 style={{ fontSize: '1.25rem', fontWeight: 'bold', marginBottom: '0.75rem', color: '#1a202c' }}>{children}</h1>,
661
  h2: ({ children }) => <h2 style={{ fontSize: '1.125rem', fontWeight: 'bold', marginBottom: '0.5rem', marginTop: '1rem', color: '#1a202c' }}>{children}</h2>,
 
739
  <div className="prose prose-sm max-w-none">
740
  <ReactMarkdown
741
  remarkPlugins={[remarkMath]}
742
+ rehypePlugins={[rehypeRaw, rehypeKatex]}
743
  components={{
744
  p: ({ children }) => <p className="mb-2 text-gray-800 leading-relaxed">{children}</p>,
745
  ul: ({ children }) => <ul className="mb-2 ml-4 list-disc">{children}</ul>,
 
769
  <div className="prose prose-sm max-w-none">
770
  <ReactMarkdown
771
  remarkPlugins={[remarkMath]}
772
+ rehypePlugins={[rehypeRaw, rehypeKatex]}
773
  components={{
774
  p: ({ children }) => <p className="mb-2 text-gray-800 leading-relaxed">{children}</p>,
775
  ul: ({ children }) => <ul className="mb-2 ml-4 list-disc">{children}</ul>,
 
803
  <div className="prose prose-sm max-w-none">
804
  <ReactMarkdown
805
  remarkPlugins={[remarkMath]}
806
+ rehypePlugins={[rehypeRaw, rehypeKatex]}
807
  components={{
808
  p: ({ children }) => <p className="mb-2 text-gray-800 leading-relaxed">{children}</p>,
809
  ul: ({ children }) => <ul className="mb-2 ml-4 list-disc">{children}</ul>,