Spaces:
Sleeping
Sleeping
Commit
·
98d2bd7
1
Parent(s):
1e6902f
Added custom chat component'
Browse files- backend/app.py +13 -1108
- backend/requirements.txt +1 -8
- frontend/components.json +21 -0
- frontend/jsconfig.json +9 -0
- frontend/package-lock.json +214 -7
- frontend/package.json +7 -0
- frontend/src/App.jsx +2 -2
- frontend/src/components/ChunkPanel.jsx +70 -116
- frontend/src/components/DocumentProcessor.jsx +3 -19
- frontend/src/components/DocumentProcessor.jsx.backup +0 -889
- frontend/src/components/ImageComponent.jsx +0 -115
- frontend/src/components/LoadingAnimation.jsx +3 -17
- frontend/src/components/SimpleChat.jsx +116 -0
- frontend/src/components/UploadPage.jsx +0 -277
- frontend/src/hooks/useChat.js +0 -109
- frontend/src/hooks/useChunkNavigation.js +2 -0
- frontend/src/hooks/useDocumentProcessor.js +52 -88
- frontend/src/lib/utils.js +6 -0
- frontend/src/utils/markdownComponents.jsx +3 -79
- frontend/src/utils/markdownUtils.js +0 -33
- frontend/vite.config.js +1 -5
- test_fuzzy_find.py +0 -194
backend/app.py
CHANGED
|
@@ -2,20 +2,13 @@ from fastapi import FastAPI, File, UploadFile, HTTPException
|
|
| 2 |
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
from fastapi.staticfiles import StaticFiles
|
| 4 |
from fastapi.responses import FileResponse
|
| 5 |
-
from mistralai import Mistral
|
| 6 |
import os
|
| 7 |
import tempfile
|
| 8 |
-
import json
|
| 9 |
-
import re
|
| 10 |
-
import string
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
-
from
|
| 13 |
-
from pydantic import BaseModel, Field
|
| 14 |
from typing import Optional, List
|
| 15 |
-
from langchain.chat_models import init_chat_model
|
| 16 |
import anthropic
|
| 17 |
-
|
| 18 |
-
from google import genai
|
| 19 |
# Load environment variables
|
| 20 |
load_dotenv()
|
| 21 |
|
|
@@ -107,1120 +100,32 @@ Keep responses concise and educational. When relevant, use LaTeX math notation l
|
|
| 107 |
|
| 108 |
@app.post("/upload_pdf")
|
| 109 |
async def upload_pdf(file: UploadFile = File(...)):
|
| 110 |
-
"""
|
| 111 |
-
print(f"📄
|
| 112 |
-
|
| 113 |
-
# Get Mistral API key
|
| 114 |
-
api_key = os.environ.get("MISTRAL_API_KEY")
|
| 115 |
-
if not api_key:
|
| 116 |
-
print("❌ No Mistral API key found")
|
| 117 |
-
raise HTTPException(status_code=500, detail="MISTRAL_API_KEY not set in environment")
|
| 118 |
|
| 119 |
try:
|
| 120 |
-
# Initialize Mistral client
|
| 121 |
-
client = Mistral(api_key=api_key)
|
| 122 |
-
print("🔑 Mistral client initialized")
|
| 123 |
-
|
| 124 |
# Read PDF bytes
|
| 125 |
file_bytes = await file.read()
|
| 126 |
print(f"📊 File size: {len(file_bytes)} bytes")
|
| 127 |
|
| 128 |
-
# Create temporary file
|
| 129 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
| 130 |
temp_file.write(file_bytes)
|
| 131 |
temp_file_path = temp_file.name
|
| 132 |
|
| 133 |
-
|
| 134 |
-
print("🚀 Uploading to Mistral...")
|
| 135 |
-
|
| 136 |
-
# Upload PDF to Mistral for OCR processing
|
| 137 |
-
uploaded_pdf = client.files.upload(
|
| 138 |
-
file={
|
| 139 |
-
"file_name": file.filename or "document.pdf",
|
| 140 |
-
"content": open(temp_file_path, "rb"),
|
| 141 |
-
},
|
| 142 |
-
purpose="ocr"
|
| 143 |
-
)
|
| 144 |
-
|
| 145 |
-
print(f"✅ Upload successful! File ID: {uploaded_pdf.id}")
|
| 146 |
-
|
| 147 |
-
return {
|
| 148 |
-
"message": "PDF uploaded to Mistral successfully!",
|
| 149 |
-
"file_id": uploaded_pdf.id,
|
| 150 |
-
"filename": file.filename,
|
| 151 |
-
"status": "uploaded",
|
| 152 |
-
"mistral_response": str(uploaded_pdf)
|
| 153 |
-
}
|
| 154 |
-
|
| 155 |
-
finally:
|
| 156 |
-
# Clean up temporary file
|
| 157 |
-
os.unlink(temp_file_path)
|
| 158 |
-
print("🗑️ Temporary file cleaned up")
|
| 159 |
-
|
| 160 |
-
except Exception as e:
|
| 161 |
-
print(f"❌ Error with Mistral API: {e}")
|
| 162 |
-
raise HTTPException(status_code=500, detail=f"Mistral API error: {str(e)}")
|
| 163 |
-
|
| 164 |
-
@app.get("/process_ocr/{file_id}")
|
| 165 |
-
async def process_ocr_content(file_id: str):
|
| 166 |
-
"""Process OCR content using proper Mistral OCR API"""
|
| 167 |
-
print(f"🔍 Processing OCR for file ID: {file_id}")
|
| 168 |
-
|
| 169 |
-
# Get Mistral API key
|
| 170 |
-
api_key = os.environ.get("MISTRAL_API_KEY")
|
| 171 |
-
if not api_key:
|
| 172 |
-
raise HTTPException(status_code=500, detail="MISTRAL_API_KEY not set")
|
| 173 |
-
|
| 174 |
-
try:
|
| 175 |
-
# Initialize Mistral client
|
| 176 |
-
client = Mistral(api_key=api_key)
|
| 177 |
-
|
| 178 |
-
# Get signed URL for the file
|
| 179 |
-
print("🔗 Getting signed URL...")
|
| 180 |
-
signed_url = client.files.get_signed_url(file_id=file_id, expiry=1)
|
| 181 |
-
print(f"✅ Signed URL obtained")
|
| 182 |
-
|
| 183 |
-
# Process OCR using the proper API
|
| 184 |
-
print("🚀 Processing OCR...")
|
| 185 |
-
ocr_response = client.ocr.process(
|
| 186 |
-
model="mistral-ocr-latest",
|
| 187 |
-
document={
|
| 188 |
-
"type": "document_url",
|
| 189 |
-
"document_url": signed_url.url,
|
| 190 |
-
},
|
| 191 |
-
include_image_base64=True # Include images for full processing
|
| 192 |
-
)
|
| 193 |
-
|
| 194 |
-
print(f"✅ OCR processing complete! Found {len(ocr_response.pages)} pages")
|
| 195 |
-
|
| 196 |
-
# Debug: Print raw OCR response structure
|
| 197 |
-
print("\n" + "="*80)
|
| 198 |
-
print("🔍 RAW MISTRAL OCR RESPONSE DEBUG:")
|
| 199 |
-
print("="*80)
|
| 200 |
-
|
| 201 |
-
for page_idx, page in enumerate(ocr_response.pages):
|
| 202 |
-
print(f"\n📄 PAGE {page_idx + 1} RAW MARKDOWN:")
|
| 203 |
-
print("-" * 50)
|
| 204 |
-
print(repr(page.markdown)) # Using repr() to show escape characters
|
| 205 |
-
print("-" * 50)
|
| 206 |
-
print("RENDERED:")
|
| 207 |
-
print(page.markdown[:500] + "..." if len(page.markdown) > 500 else page.markdown)
|
| 208 |
-
print(f"TOTAL LENGTH: {len(page.markdown)} characters")
|
| 209 |
-
|
| 210 |
-
print("="*80)
|
| 211 |
-
print("END RAW OCR DEBUG")
|
| 212 |
-
print("="*80 + "\n")
|
| 213 |
-
|
| 214 |
-
# Process each page and extract structured data (without per-page chunking)
|
| 215 |
-
processed_pages = []
|
| 216 |
-
all_page_markdown = []
|
| 217 |
-
|
| 218 |
-
for page_idx, page in enumerate(ocr_response.pages):
|
| 219 |
-
print(f"📄 Page {page_idx + 1}: {len(page.markdown)} chars, {len(page.images)} images")
|
| 220 |
-
|
| 221 |
-
page_data = {
|
| 222 |
-
"index": page.index,
|
| 223 |
-
"markdown": page.markdown,
|
| 224 |
-
"images": [],
|
| 225 |
-
"dimensions": {
|
| 226 |
-
"dpi": page.dimensions.dpi,
|
| 227 |
-
"height": page.dimensions.height,
|
| 228 |
-
"width": page.dimensions.width
|
| 229 |
-
}
|
| 230 |
-
}
|
| 231 |
-
|
| 232 |
-
# Process images with coordinates
|
| 233 |
-
for img in page.images:
|
| 234 |
-
image_data = {
|
| 235 |
-
"id": img.id,
|
| 236 |
-
"coordinates": {
|
| 237 |
-
"top_left_x": img.top_left_x,
|
| 238 |
-
"top_left_y": img.top_left_y,
|
| 239 |
-
"bottom_right_x": img.bottom_right_x,
|
| 240 |
-
"bottom_right_y": img.bottom_right_y
|
| 241 |
-
},
|
| 242 |
-
"has_base64": bool(img.image_base64) # Don't include actual base64 in response
|
| 243 |
-
}
|
| 244 |
-
page_data["images"].append(image_data)
|
| 245 |
-
|
| 246 |
-
processed_pages.append(page_data)
|
| 247 |
-
all_page_markdown.append(page.markdown)
|
| 248 |
-
|
| 249 |
-
# Combine all markdown into single document
|
| 250 |
-
combined_markdown = '\n\n---\n\n'.join(all_page_markdown)
|
| 251 |
-
print(f"📋 Combined document: {len(combined_markdown)} chars total")
|
| 252 |
-
|
| 253 |
-
# Auto-chunk the entire document once - try Gemini first, then fallback
|
| 254 |
-
document_chunks = []
|
| 255 |
-
original_markdown = combined_markdown
|
| 256 |
-
try:
|
| 257 |
-
print(f"🧠 Auto-chunking entire document with Gemini...")
|
| 258 |
-
document_chunks, original_markdown = await gemini_chunk_document(combined_markdown)
|
| 259 |
-
|
| 260 |
-
# If Gemini failed, try the old Fireworks method
|
| 261 |
-
if not document_chunks:
|
| 262 |
-
print(f"🔄 Gemini failed, falling back to Fireworks...")
|
| 263 |
-
document_chunks, original_markdown = await auto_chunk_document(combined_markdown, client)
|
| 264 |
-
|
| 265 |
-
print(f"📊 Document chunks found: {len(document_chunks)}")
|
| 266 |
-
for i, chunk in enumerate(document_chunks):
|
| 267 |
-
topic = chunk.get('topic', 'Unknown')
|
| 268 |
-
preview = chunk.get('text', chunk.get('start_phrase', ''))[:50] + "..." if chunk.get('text', chunk.get('start_phrase', '')) else 'No content'
|
| 269 |
-
print(f" {i+1}. {topic}: {preview}")
|
| 270 |
-
|
| 271 |
-
except Exception as chunk_error:
|
| 272 |
-
print(f"⚠️ Document chunking failed: {chunk_error}")
|
| 273 |
-
document_chunks = []
|
| 274 |
-
original_markdown = combined_markdown
|
| 275 |
-
|
| 276 |
-
print(f"📝 Total processed pages: {len(processed_pages)}")
|
| 277 |
-
|
| 278 |
-
return {
|
| 279 |
-
"file_id": file_id,
|
| 280 |
-
"pages": processed_pages,
|
| 281 |
-
"total_pages": len(processed_pages),
|
| 282 |
-
"combined_markdown": original_markdown, # Send original version for highlighting
|
| 283 |
-
"chunks": document_chunks,
|
| 284 |
-
"status": "processed"
|
| 285 |
-
}
|
| 286 |
-
|
| 287 |
-
except Exception as e:
|
| 288 |
-
print(f"❌ Error processing OCR: {e}")
|
| 289 |
-
raise HTTPException(status_code=500, detail=f"Error processing OCR: {str(e)}")
|
| 290 |
-
|
| 291 |
-
@app.get("/get_image/{file_id}/{image_id}")
|
| 292 |
-
async def get_image_base64(file_id: str, image_id: str):
|
| 293 |
-
"""Get base64 image data for a specific image"""
|
| 294 |
-
print(f"🖼️ Getting image {image_id} from file {file_id}")
|
| 295 |
-
|
| 296 |
-
# Get Mistral API key
|
| 297 |
-
api_key = os.environ.get("MISTRAL_API_KEY")
|
| 298 |
-
if not api_key:
|
| 299 |
-
raise HTTPException(status_code=500, detail="MISTRAL_API_KEY not set")
|
| 300 |
-
|
| 301 |
-
try:
|
| 302 |
-
# Initialize Mistral client
|
| 303 |
-
client = Mistral(api_key=api_key)
|
| 304 |
-
|
| 305 |
-
# Get signed URL and process OCR again (we could cache this)
|
| 306 |
-
signed_url = client.files.get_signed_url(file_id=file_id, expiry=1)
|
| 307 |
-
|
| 308 |
-
ocr_response = client.ocr.process(
|
| 309 |
-
model="mistral-ocr-latest",
|
| 310 |
-
document={
|
| 311 |
-
"type": "document_url",
|
| 312 |
-
"document_url": signed_url.url,
|
| 313 |
-
},
|
| 314 |
-
include_image_base64=True
|
| 315 |
-
)
|
| 316 |
-
|
| 317 |
-
# Find the requested image
|
| 318 |
-
for page in ocr_response.pages:
|
| 319 |
-
for img in page.images:
|
| 320 |
-
if img.id == image_id:
|
| 321 |
-
return {
|
| 322 |
-
"image_id": image_id,
|
| 323 |
-
"image_base64": img.image_base64,
|
| 324 |
-
"coordinates": {
|
| 325 |
-
"top_left_x": img.top_left_x,
|
| 326 |
-
"top_left_y": img.top_left_y,
|
| 327 |
-
"bottom_right_x": img.bottom_right_x,
|
| 328 |
-
"bottom_right_y": img.bottom_right_y
|
| 329 |
-
}
|
| 330 |
-
}
|
| 331 |
-
|
| 332 |
-
raise HTTPException(status_code=404, detail=f"Image {image_id} not found")
|
| 333 |
-
|
| 334 |
-
except Exception as e:
|
| 335 |
-
print(f"❌ Error getting image: {e}")
|
| 336 |
-
raise HTTPException(status_code=500, detail=f"Error getting image: {str(e)}")
|
| 337 |
-
|
| 338 |
-
class ChunkSchema(BaseModel):
|
| 339 |
-
"""Schema for document chunks suitable for creating interactive lessons."""
|
| 340 |
-
topic: str = Field(description="Brief descriptive name (2-6 words) for the educational content")
|
| 341 |
-
text: str = Field(description="Complete chunk text with exact markdown/LaTeX formatting preserved, containing 2-3 related educational concepts")
|
| 342 |
-
|
| 343 |
-
class ChunkList(BaseModel):
|
| 344 |
-
"""Container for a list of document chunks."""
|
| 345 |
-
chunks: List[ChunkSchema] = Field(description="List of identified chunks for interactive lessons")
|
| 346 |
-
|
| 347 |
-
def find_paragraph_end(text, start_pos):
|
| 348 |
-
"""Find the end of a paragraph starting from start_pos"""
|
| 349 |
-
end_pos = start_pos
|
| 350 |
-
while end_pos < len(text) and text[end_pos] not in ['\n', '\r']:
|
| 351 |
-
end_pos += 1
|
| 352 |
-
|
| 353 |
-
return end_pos
|
| 354 |
-
|
| 355 |
-
def find_paragraph_end(text, start_pos):
|
| 356 |
-
"""Find the end of current paragraph (looks for \\n\\n or document end)"""
|
| 357 |
-
pos = start_pos
|
| 358 |
-
while pos < len(text):
|
| 359 |
-
if pos < len(text) - 1 and text[pos:pos+2] == '\n\n':
|
| 360 |
-
return pos # End at paragraph break
|
| 361 |
-
elif text[pos] in '.!?':
|
| 362 |
-
# Found sentence end, check if paragraph continues
|
| 363 |
-
next_pos = pos + 1
|
| 364 |
-
while next_pos < len(text) and text[next_pos] in ' \t':
|
| 365 |
-
next_pos += 1
|
| 366 |
-
if next_pos < len(text) - 1 and text[next_pos:next_pos+2] == '\n\n':
|
| 367 |
-
return next_pos # Paragraph ends after this sentence
|
| 368 |
-
pos = next_pos
|
| 369 |
-
else:
|
| 370 |
-
pos += 1
|
| 371 |
-
return min(pos, len(text))
|
| 372 |
-
|
| 373 |
-
def fuzzy_find(text, pattern, start_pos=0):
|
| 374 |
-
"""Find the best fuzzy match for pattern in text starting from start_pos"""
|
| 375 |
-
best_ratio = 0
|
| 376 |
-
best_pos = -1
|
| 377 |
-
|
| 378 |
-
# Search in sliding windows
|
| 379 |
-
pattern_len = len(pattern)
|
| 380 |
-
for i in range(start_pos, len(text) - pattern_len + 1):
|
| 381 |
-
window = text[i:i + pattern_len]
|
| 382 |
-
ratio = SequenceMatcher(None, pattern.lower(), window.lower()).ratio()
|
| 383 |
-
|
| 384 |
-
if ratio > best_ratio and ratio > 0.8: # Much stricter: 80% similarity
|
| 385 |
-
best_ratio = ratio
|
| 386 |
-
best_pos = i
|
| 387 |
-
|
| 388 |
-
return best_pos if best_pos != -1 else None
|
| 389 |
-
|
| 390 |
-
def clean_academic_content(text):
|
| 391 |
-
"""Remove common academic paper noise that breaks natural chunking"""
|
| 392 |
-
|
| 393 |
-
# Patterns to remove/clean
|
| 394 |
-
patterns_to_remove = [
|
| 395 |
-
# Author contribution footnotes
|
| 396 |
-
r'\[\^\d+\]:\s*[∗\*]+\s*Equal contribution[^.]*\.',
|
| 397 |
-
r'\[\^\d+\]:\s*[†\*]+\s*Correspondence to[^.]*\.',
|
| 398 |
-
r'\[\^\d+\]:\s*[†\*]+\s*Corresponding author[^.]*\.',
|
| 399 |
-
|
| 400 |
-
# Copyright notices
|
| 401 |
-
r'Copyright \(c\) \d{4}[^.]*\.',
|
| 402 |
-
r'All rights reserved\.',
|
| 403 |
-
|
| 404 |
-
# Common academic noise
|
| 405 |
-
r'\[\^\d+\]:\s*Code available at[^.]*\.',
|
| 406 |
-
r'\[\^\d+\]:\s*Data available at[^.]*\.',
|
| 407 |
-
r'\[\^\d+\]:\s*This work was[^.]*\.',
|
| 408 |
-
|
| 409 |
-
# Funding acknowledgments (often break paragraphs)
|
| 410 |
-
r'This research was supported by[^.]*\.',
|
| 411 |
-
r'Funded by[^.]*\.',
|
| 412 |
-
|
| 413 |
-
# Page numbers and headers that shouldn't end paragraphs
|
| 414 |
-
r'^\d+$', # Standalone page numbers
|
| 415 |
-
r'^Page \d+',
|
| 416 |
-
|
| 417 |
-
# DOI and URL patterns that break paragraphs
|
| 418 |
-
r'DOI:\s*\S+',
|
| 419 |
-
r'arXiv:\d{4}\.\d{4,5}',
|
| 420 |
-
]
|
| 421 |
-
|
| 422 |
-
cleaned_text = text
|
| 423 |
-
for pattern in patterns_to_remove:
|
| 424 |
-
cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.MULTILINE | re.IGNORECASE)
|
| 425 |
-
|
| 426 |
-
# Clean up multiple newlines created by removals
|
| 427 |
-
cleaned_text = re.sub(r'\n\n\n+', '\n\n', cleaned_text)
|
| 428 |
-
|
| 429 |
-
return cleaned_text.strip()
|
| 430 |
-
|
| 431 |
-
def validate_paragraph_chunk(chunk_text):
|
| 432 |
-
"""Check if a chunk looks like valid content (not metadata/noise)"""
|
| 433 |
-
# Skip very short chunks
|
| 434 |
-
if len(chunk_text.strip()) < 50:
|
| 435 |
-
return False
|
| 436 |
-
|
| 437 |
-
# Skip chunks that are mostly footnote references
|
| 438 |
-
footnote_refs = len(re.findall(r'\[\^\d+\]', chunk_text))
|
| 439 |
-
if footnote_refs > len(chunk_text.split()) / 10: # More than 10% footnote refs
|
| 440 |
-
return False
|
| 441 |
-
|
| 442 |
-
# Skip chunks that are mostly citations
|
| 443 |
-
citations = len(re.findall(r'\[\d+\]', chunk_text))
|
| 444 |
-
if citations > len(chunk_text.split()) / 8: # More than 12.5% citations
|
| 445 |
-
return False
|
| 446 |
-
|
| 447 |
-
# Skip chunks that are mostly symbols/special chars
|
| 448 |
-
normal_chars = sum(1 for c in chunk_text if c.isalnum() or c in string.whitespace)
|
| 449 |
-
if normal_chars / len(chunk_text) < 0.7: # Less than 70% normal content
|
| 450 |
-
return False
|
| 451 |
-
|
| 452 |
-
return True
|
| 453 |
-
|
| 454 |
-
def programmatic_chunk_document(document_markdown):
|
| 455 |
-
"""Chunk document by natural paragraph boundaries - much more reliable than LLM"""
|
| 456 |
-
if not document_markdown or len(document_markdown.strip()) < 100:
|
| 457 |
-
return []
|
| 458 |
-
|
| 459 |
-
# Use original document without any cleaning to preserve integrity
|
| 460 |
-
original_markdown = document_markdown
|
| 461 |
-
print(f"📄 Using original document: {len(document_markdown)} chars")
|
| 462 |
-
|
| 463 |
-
chunks = []
|
| 464 |
-
start_pos = 0
|
| 465 |
-
chunk_count = 0
|
| 466 |
-
|
| 467 |
-
print(f"🧠 Using programmatic paragraph-based chunking...")
|
| 468 |
-
|
| 469 |
-
# Find all proper paragraph endings: [.!?] followed by \n\n
|
| 470 |
-
paragraph_ends = []
|
| 471 |
-
|
| 472 |
-
# Pattern: sentence punctuation followed by \n\n
|
| 473 |
-
pattern = r'([.!?])\n\n'
|
| 474 |
-
matches = re.finditer(pattern, original_markdown)
|
| 475 |
-
|
| 476 |
-
for match in matches:
|
| 477 |
-
end_pos = match.end() - 3 # Position right after punctuation, before \n\n
|
| 478 |
-
paragraph_ends.append(end_pos)
|
| 479 |
-
|
| 480 |
-
print(f"📊 Found {len(paragraph_ends)} natural paragraph endings")
|
| 481 |
-
|
| 482 |
-
# Create chunks from paragraph boundaries using original document
|
| 483 |
-
for i, end_pos in enumerate(paragraph_ends):
|
| 484 |
-
# Extract from original markdown
|
| 485 |
-
chunk_text_clean = original_markdown[start_pos:end_pos + 1]
|
| 486 |
-
|
| 487 |
-
# Validate chunk quality
|
| 488 |
-
if not validate_paragraph_chunk(chunk_text_clean):
|
| 489 |
-
print(f" ❌ Skipping low-quality chunk: {chunk_text_clean[:50]}...")
|
| 490 |
-
start_pos = end_pos + 3 # Skip past .\n\n
|
| 491 |
-
continue
|
| 492 |
-
|
| 493 |
-
chunk_count += 1
|
| 494 |
-
|
| 495 |
-
# Map positions back to original document for highlighting
|
| 496 |
-
# For now, use cleaned positions (we could implement position mapping if needed)
|
| 497 |
-
chunk_text = chunk_text_clean
|
| 498 |
-
|
| 499 |
-
# Create a simple topic from first few words
|
| 500 |
-
first_line = chunk_text.split('\n')[0].strip()
|
| 501 |
-
topic = first_line[:50] + "..." if len(first_line) > 50 else first_line
|
| 502 |
-
|
| 503 |
-
chunks.append({
|
| 504 |
-
"topic": topic,
|
| 505 |
-
"start_position": start_pos,
|
| 506 |
-
"end_position": end_pos + 1,
|
| 507 |
-
"start_phrase": chunk_text[:20] + "...", # First 20 chars
|
| 508 |
-
"end_phrase": "..." + chunk_text[-20:], # Last 20 chars
|
| 509 |
-
"found_start": True,
|
| 510 |
-
"found_end": True
|
| 511 |
-
})
|
| 512 |
-
|
| 513 |
-
print(f" ✅ Chunk {chunk_count}: {start_pos}-{end_pos + 1} (length: {end_pos + 1 - start_pos})")
|
| 514 |
-
print(f" Topic: {topic}")
|
| 515 |
-
print(f" Preview: {chunk_text[:80]}...")
|
| 516 |
-
|
| 517 |
-
# Next chunk starts after \n\n
|
| 518 |
-
start_pos = end_pos + 3 # Skip past .\n\n
|
| 519 |
-
|
| 520 |
-
# Handle any remaining text (document might not end with proper paragraph)
|
| 521 |
-
if start_pos < len(original_markdown):
|
| 522 |
-
remaining_text = original_markdown[start_pos:].strip()
|
| 523 |
-
if remaining_text and validate_paragraph_chunk(remaining_text):
|
| 524 |
-
chunk_count += 1
|
| 525 |
-
first_line = remaining_text.split('\n')[0].strip()
|
| 526 |
-
topic = first_line[:50] + "..." if len(first_line) > 50 else first_line
|
| 527 |
-
|
| 528 |
-
chunks.append({
|
| 529 |
-
"topic": topic,
|
| 530 |
-
"start_position": start_pos,
|
| 531 |
-
"end_position": len(original_markdown),
|
| 532 |
-
"start_phrase": remaining_text[:20] + "...",
|
| 533 |
-
"end_phrase": "..." + remaining_text[-20:],
|
| 534 |
-
"found_start": True,
|
| 535 |
-
"found_end": True
|
| 536 |
-
})
|
| 537 |
-
|
| 538 |
-
print(f" ✅ Final chunk {chunk_count}: {start_pos}-{len(original_markdown)} (remaining text)")
|
| 539 |
-
else:
|
| 540 |
-
print(f" ❌ Skipping low-quality remaining text")
|
| 541 |
-
|
| 542 |
-
print(f"📊 Created {len(chunks)} high-quality paragraph-based chunks")
|
| 543 |
-
|
| 544 |
-
# Note: We're returning chunks based on original document positions
|
| 545 |
-
# The frontend will use the original document for highlighting
|
| 546 |
-
return chunks, document_markdown
|
| 547 |
-
|
| 548 |
-
def split_document_into_batches(document_markdown, max_chars=8000):
|
| 549 |
-
"""Split document into manageable batches for LLM processing"""
|
| 550 |
-
if len(document_markdown) <= max_chars:
|
| 551 |
-
return [document_markdown]
|
| 552 |
-
|
| 553 |
-
batches = []
|
| 554 |
-
current_pos = 0
|
| 555 |
-
|
| 556 |
-
while current_pos < len(document_markdown):
|
| 557 |
-
# Try to find a good breaking point (paragraph boundary)
|
| 558 |
-
end_pos = min(current_pos + max_chars, len(document_markdown))
|
| 559 |
-
|
| 560 |
-
# If we're not at the end, try to break at a paragraph boundary
|
| 561 |
-
if end_pos < len(document_markdown):
|
| 562 |
-
# Look for \n\n within the last 1000 characters of this batch
|
| 563 |
-
search_start = max(end_pos - 1000, current_pos)
|
| 564 |
-
last_paragraph = document_markdown.rfind('\n\n', search_start, end_pos)
|
| 565 |
-
|
| 566 |
-
if last_paragraph != -1 and last_paragraph > current_pos:
|
| 567 |
-
end_pos = last_paragraph + 2 # Include the \n\n
|
| 568 |
-
|
| 569 |
-
batch = document_markdown[current_pos:end_pos]
|
| 570 |
-
batches.append(batch)
|
| 571 |
-
current_pos = end_pos
|
| 572 |
-
|
| 573 |
-
print(f"📄 Created batch {len(batches)}: {len(batch)} chars (pos {current_pos-len(batch)}-{current_pos})")
|
| 574 |
-
|
| 575 |
-
return batches
|
| 576 |
-
|
| 577 |
-
async def gemini_chunk_document(document_markdown):
|
| 578 |
-
"""Auto-chunk a document using Google Gemini 2.5 Pro with reliable structured output"""
|
| 579 |
-
|
| 580 |
-
# Get Gemini API key
|
| 581 |
-
gemini_api_key = os.environ.get("GEMINI_API_KEY")
|
| 582 |
-
if not gemini_api_key:
|
| 583 |
-
print("⚠️ No Gemini API key found")
|
| 584 |
-
return None, document_markdown
|
| 585 |
-
|
| 586 |
-
print(f"📄 Document length: {len(document_markdown)} characters")
|
| 587 |
-
|
| 588 |
-
try:
|
| 589 |
-
# Initialize Gemini client
|
| 590 |
-
client = genai.Client(api_key=gemini_api_key)
|
| 591 |
-
|
| 592 |
-
# Split document into batches if it's too large (Gemini has token limits)
|
| 593 |
-
batches = split_document_into_batches(document_markdown, max_chars=12000) # Gemini can handle larger batches
|
| 594 |
-
print(f"📄 Split document into {len(batches)} batches for Gemini")
|
| 595 |
-
|
| 596 |
-
all_chunks = []
|
| 597 |
-
|
| 598 |
-
# Process each batch
|
| 599 |
-
for batch_idx, batch in enumerate(batches):
|
| 600 |
-
print(f"\n🔄 Processing batch {batch_idx + 1}/{len(batches)} ({len(batch)} chars) with Gemini")
|
| 601 |
-
|
| 602 |
-
try:
|
| 603 |
-
# Create the prompt for Gemini
|
| 604 |
-
prompt = f"""You are an educational content analyzer. Analyze this document section and break it into logical learning chunks.
|
| 605 |
-
|
| 606 |
-
Each chunk should:
|
| 607 |
-
- Contain 2-3 related educational concepts that naturally belong together
|
| 608 |
-
- Be 150-500 words (optimal for learning)
|
| 609 |
-
- Have clear educational value
|
| 610 |
-
- Preserve all markdown/LaTeX formatting exactly
|
| 611 |
-
- Skip: abstracts, acknowledgments, references, author info, page numbers
|
| 612 |
-
|
| 613 |
-
Return your response as a valid JSON object with this exact structure:
|
| 614 |
-
{{
|
| 615 |
-
"chunks": [
|
| 616 |
-
{{
|
| 617 |
-
"topic": "Brief descriptive name (2-6 words)",
|
| 618 |
-
"text": "Complete chunk text with exact formatting preserved"
|
| 619 |
-
}}
|
| 620 |
-
]
|
| 621 |
-
}}
|
| 622 |
-
|
| 623 |
-
Document section to analyze:
|
| 624 |
-
{batch}
|
| 625 |
-
|
| 626 |
-
Important: Return ONLY the JSON object, no other text."""
|
| 627 |
-
|
| 628 |
-
# Call Gemini 2.5 Pro (disable thinking for faster/cheaper responses)
|
| 629 |
-
response = client.models.generate_content(
|
| 630 |
-
model="gemini-2.5-pro",
|
| 631 |
-
contents=prompt,
|
| 632 |
-
config=genai.types.GenerateContentConfig(
|
| 633 |
-
thinking_config=genai.types.ThinkingConfig(thinking_budget=-1)
|
| 634 |
-
)
|
| 635 |
-
)
|
| 636 |
-
|
| 637 |
-
# Extract and parse response
|
| 638 |
-
response_text = response.text.strip()
|
| 639 |
-
print(f"📋 Gemini response preview: {response_text}...")
|
| 640 |
-
|
| 641 |
-
# Clean up the response (remove code blocks if present)
|
| 642 |
-
clean_response = response_text
|
| 643 |
-
if clean_response.startswith('```json'):
|
| 644 |
-
clean_response = clean_response[7:]
|
| 645 |
-
if clean_response.endswith('```'):
|
| 646 |
-
clean_response = clean_response[:-3]
|
| 647 |
-
clean_response = clean_response.strip()
|
| 648 |
-
|
| 649 |
-
# Parse JSON
|
| 650 |
-
try:
|
| 651 |
-
json_data = json.loads(clean_response)
|
| 652 |
-
|
| 653 |
-
# Validate structure
|
| 654 |
-
if not isinstance(json_data, dict) or 'chunks' not in json_data:
|
| 655 |
-
print(f"❌ Invalid response structure from Gemini batch {batch_idx + 1}")
|
| 656 |
-
continue
|
| 657 |
-
|
| 658 |
-
chunks = json_data['chunks']
|
| 659 |
-
if not isinstance(chunks, list):
|
| 660 |
-
print(f"❌ 'chunks' is not a list in batch {batch_idx + 1}")
|
| 661 |
-
continue
|
| 662 |
-
|
| 663 |
-
# Process chunks
|
| 664 |
-
batch_chunks = []
|
| 665 |
-
for i, chunk in enumerate(chunks):
|
| 666 |
-
if not isinstance(chunk, dict) or 'topic' not in chunk or 'text' not in chunk:
|
| 667 |
-
print(f"❌ Invalid chunk structure in batch {batch_idx + 1}, chunk {i}")
|
| 668 |
-
continue
|
| 669 |
-
|
| 670 |
-
# Clean up text formatting
|
| 671 |
-
chunk_text = chunk['text']
|
| 672 |
-
# Replace literal \n with actual newlines
|
| 673 |
-
chunk_text = chunk_text.replace('\\n', '\n')
|
| 674 |
-
|
| 675 |
-
batch_chunks.append({
|
| 676 |
-
"topic": chunk['topic'],
|
| 677 |
-
"text": chunk_text,
|
| 678 |
-
"chunk_index": len(all_chunks) + len(batch_chunks)
|
| 679 |
-
})
|
| 680 |
-
|
| 681 |
-
print(f"✅ Processed chunk: {chunk['topic']}")
|
| 682 |
-
|
| 683 |
-
all_chunks.extend(batch_chunks)
|
| 684 |
-
print(f"📊 Batch {batch_idx + 1} added {len(batch_chunks)} chunks (total: {len(all_chunks)})")
|
| 685 |
-
|
| 686 |
-
except json.JSONDecodeError as e:
|
| 687 |
-
print(f"❌ JSON parsing failed for batch {batch_idx + 1}: {e}")
|
| 688 |
-
print(f"❌ Response was: {response_text}")
|
| 689 |
-
continue
|
| 690 |
-
|
| 691 |
-
except Exception as e:
|
| 692 |
-
print(f"❌ Error processing batch {batch_idx + 1} with Gemini: {e}")
|
| 693 |
-
continue
|
| 694 |
-
|
| 695 |
-
# Return results
|
| 696 |
-
if all_chunks:
|
| 697 |
-
print(f"✅ Gemini successfully processed document with {len(all_chunks)} total chunks")
|
| 698 |
-
return all_chunks, document_markdown
|
| 699 |
-
else:
|
| 700 |
-
print("❌ Gemini processing failed for all batches")
|
| 701 |
-
return None, document_markdown
|
| 702 |
-
|
| 703 |
-
except Exception as e:
|
| 704 |
-
print(f"❌ Gemini chunking error: {e}")
|
| 705 |
-
return None, document_markdown
|
| 706 |
-
|
| 707 |
-
async def auto_chunk_document(document_markdown, client=None):
|
| 708 |
-
"""Auto-chunk a document using LLM with batch processing for large documents"""
|
| 709 |
-
|
| 710 |
-
# Debug: Print document info
|
| 711 |
-
print(f"📄 Document length: {len(document_markdown)} characters")
|
| 712 |
-
|
| 713 |
-
# Get Fireworks API key
|
| 714 |
-
fireworks_api_key = os.environ.get("FIREWORKS_API_KEY")
|
| 715 |
-
if not fireworks_api_key:
|
| 716 |
-
print("⚠️ No Fireworks API key found, falling back to programmatic chunking")
|
| 717 |
-
chunks, original_markdown = programmatic_chunk_document(document_markdown)
|
| 718 |
-
return chunks, original_markdown
|
| 719 |
-
|
| 720 |
-
# Split document into batches if it's too large
|
| 721 |
-
batches = split_document_into_batches(document_markdown, max_chars=8000)
|
| 722 |
-
print(f"📄 Split document into {len(batches)} batches")
|
| 723 |
-
|
| 724 |
-
all_chunks = []
|
| 725 |
-
|
| 726 |
-
# Process each batch
|
| 727 |
-
for batch_idx, batch in enumerate(batches):
|
| 728 |
-
print(f"\n🔄 Processing batch {batch_idx + 1}/{len(batches)} ({len(batch)} chars)")
|
| 729 |
-
|
| 730 |
-
# Try structured output with retry logic for this batch
|
| 731 |
-
max_retries = 3
|
| 732 |
-
batch_chunks = None
|
| 733 |
-
|
| 734 |
-
for attempt in range(max_retries):
|
| 735 |
-
try:
|
| 736 |
-
print(f"🚀 Batch {batch_idx + 1} Attempt {attempt + 1}/{max_retries}: Calling Fireworks...")
|
| 737 |
-
|
| 738 |
-
# Initialize LLM
|
| 739 |
-
llm = init_chat_model(
|
| 740 |
-
"accounts/fireworks/models/llama4-maverick-instruct-basic",
|
| 741 |
-
model_provider="fireworks",
|
| 742 |
-
api_key=fireworks_api_key
|
| 743 |
-
)
|
| 744 |
-
|
| 745 |
-
# Use regular LLM and manual JSON parsing
|
| 746 |
-
prompt = f"""You are an educational content analyzer. Break this document section into logical learning chunks.
|
| 747 |
-
|
| 748 |
-
IMPORTANT: Return your response as a valid JSON object with this exact structure:
|
| 749 |
-
{{
|
| 750 |
-
"chunks": [
|
| 751 |
-
{{
|
| 752 |
-
"topic": "Brief topic name",
|
| 753 |
-
"text": "Complete chunk text with exact formatting"
|
| 754 |
-
}}
|
| 755 |
-
]
|
| 756 |
-
}}
|
| 757 |
-
|
| 758 |
-
Rules for chunking:
|
| 759 |
-
- Each chunk should contain 2-3 related educational concepts
|
| 760 |
-
- Keep chunks concise: 100-300 words (avoid very long text blocks)
|
| 761 |
-
- Preserve all markdown/LaTeX formatting exactly as written
|
| 762 |
-
- Skip: abstracts, acknowledgements, references, author information, page numbers
|
| 763 |
-
- Create separate chunks for figures/tables with their captions
|
| 764 |
-
- Never split mathematical expressions or LaTeX formulas
|
| 765 |
-
- Process ALL content in this section - don't skip any educational material
|
| 766 |
-
- Ensure all JSON strings are properly formatted (no unescaped quotes)
|
| 767 |
-
|
| 768 |
-
Document section to analyze:
|
| 769 |
-
{batch}
|
| 770 |
-
|
| 771 |
-
Return only the JSON object, no other text."""
|
| 772 |
-
|
| 773 |
-
# Call regular LLM
|
| 774 |
-
result = llm.invoke(prompt)
|
| 775 |
-
print(f"📋 Raw LLM response type: {type(result)}")
|
| 776 |
-
|
| 777 |
-
# Extract text content
|
| 778 |
-
if hasattr(result, 'content'):
|
| 779 |
-
response_text = result.content
|
| 780 |
-
elif hasattr(result, 'text'):
|
| 781 |
-
response_text = result.text
|
| 782 |
-
else:
|
| 783 |
-
response_text = str(result)
|
| 784 |
-
|
| 785 |
-
print(f"📋 Response text preview: {response_text}...")
|
| 786 |
-
|
| 787 |
-
# Try to parse JSON manually
|
| 788 |
-
|
| 789 |
-
try:
|
| 790 |
-
# Clean up the response - remove any markdown code blocks and fix common issues
|
| 791 |
-
clean_response = response_text.strip()
|
| 792 |
-
if clean_response.startswith('```json'):
|
| 793 |
-
clean_response = clean_response[7:]
|
| 794 |
-
if clean_response.endswith('```'):
|
| 795 |
-
clean_response = clean_response[:-3]
|
| 796 |
-
clean_response = clean_response.strip()
|
| 797 |
-
|
| 798 |
-
# Fix common JSON truncation issues
|
| 799 |
-
# If the response doesn't end properly, try to close it
|
| 800 |
-
if not clean_response.endswith('}'):
|
| 801 |
-
# Try to find the last complete chunk entry and close properly
|
| 802 |
-
last_brace = clean_response.rfind('}')
|
| 803 |
-
if last_brace != -1:
|
| 804 |
-
# Find if we're inside a chunks array
|
| 805 |
-
chunks_start = clean_response.find('"chunks": [')
|
| 806 |
-
if chunks_start != -1 and last_brace > chunks_start:
|
| 807 |
-
# Close the chunks array and main object
|
| 808 |
-
clean_response = clean_response[:last_brace+1] + '\n ]\n}'
|
| 809 |
-
else:
|
| 810 |
-
clean_response = clean_response[:last_brace+1]
|
| 811 |
-
|
| 812 |
-
print(f"📋 Cleaned response preview: {clean_response[:300]}...")
|
| 813 |
-
print(f"📋 Cleaned response ends with: '{clean_response[-50:]}'")
|
| 814 |
-
|
| 815 |
-
# Additional safety: ensure we have a complete JSON structure
|
| 816 |
-
if not (clean_response.startswith('{') and clean_response.endswith('}')):
|
| 817 |
-
print(f"❌ Response doesn't look like valid JSON structure")
|
| 818 |
-
continue
|
| 819 |
-
|
| 820 |
-
# Fix common JSON escape issues with LaTeX
|
| 821 |
-
# Replace single backslashes with double backslashes in JSON strings
|
| 822 |
-
# But be careful not to affect already-escaped sequences
|
| 823 |
-
def fix_latex_escapes(text):
|
| 824 |
-
# Find all JSON string values (between quotes)
|
| 825 |
-
def escape_in_string(match):
|
| 826 |
-
string_content = match.group(1)
|
| 827 |
-
# Escape single backslashes in LaTeX commands
|
| 828 |
-
# Handle \mathrm, \left, \%, etc. but preserve JSON escapes like \n, \t, \", \\
|
| 829 |
-
# Pattern: backslash followed by letters OR specific LaTeX symbols like %
|
| 830 |
-
fixed = re.sub(r'(?<!\\)\\(?=[a-zA-Z%])', r'\\\\', string_content)
|
| 831 |
-
return f'"{fixed}"'
|
| 832 |
-
|
| 833 |
-
# Apply to all JSON string values
|
| 834 |
-
return re.sub(r'"([^"\\]*(\\.[^"\\]*)*)"', escape_in_string, text)
|
| 835 |
-
|
| 836 |
-
clean_response = fix_latex_escapes(clean_response)
|
| 837 |
-
print(f"📋 After escape fixing: {clean_response[:200]}...")
|
| 838 |
-
|
| 839 |
-
# Parse JSON
|
| 840 |
-
json_data = json.loads(clean_response)
|
| 841 |
-
print(f"📋 Successfully parsed JSON: {type(json_data)}")
|
| 842 |
-
|
| 843 |
-
# Validate with Pydantic
|
| 844 |
-
chunk_response = ChunkList.model_validate(json_data)
|
| 845 |
-
print(f"📋 Pydantic validation successful: {type(chunk_response)}")
|
| 846 |
-
|
| 847 |
-
# Fix literal \n strings in chunk text (convert to actual newlines)
|
| 848 |
-
for chunk in chunk_response.chunks:
|
| 849 |
-
if hasattr(chunk, 'text') and chunk.text:
|
| 850 |
-
# Replace literal \n with actual newlines for paragraph breaks
|
| 851 |
-
# Be careful not to affect LaTeX commands that might contain 'n'
|
| 852 |
-
chunk.text = chunk.text.replace('\\n', '\n')
|
| 853 |
-
|
| 854 |
-
except json.JSONDecodeError as e:
|
| 855 |
-
print(f"❌ Attempt {attempt + 1}: JSON parsing failed: {e}")
|
| 856 |
-
print(f"❌ Response was: {response_text}")
|
| 857 |
-
continue
|
| 858 |
-
except Exception as e:
|
| 859 |
-
print(f"❌ Attempt {attempt + 1}: Pydantic validation failed: {e}")
|
| 860 |
-
continue
|
| 861 |
-
|
| 862 |
-
chunks = chunk_response.chunks
|
| 863 |
-
if not chunks or len(chunks) == 0:
|
| 864 |
-
print(f"⚠️ Attempt {attempt + 1}: No chunks returned")
|
| 865 |
-
continue
|
| 866 |
-
|
| 867 |
-
# Success! Process chunks
|
| 868 |
-
processed_chunks = []
|
| 869 |
-
for i, chunk in enumerate(chunks):
|
| 870 |
-
print(f"\n📝 Processing chunk {i+1}: {chunk.topic}")
|
| 871 |
-
|
| 872 |
-
if not hasattr(chunk, 'text') or not chunk.text.strip():
|
| 873 |
-
print(f"❌ Chunk missing or empty text: {chunk}")
|
| 874 |
-
continue
|
| 875 |
-
|
| 876 |
-
print(f" Text preview: '{chunk.text[:100]}...'")
|
| 877 |
-
|
| 878 |
-
processed_chunks.append({
|
| 879 |
-
"topic": chunk.topic,
|
| 880 |
-
"text": chunk.text,
|
| 881 |
-
"chunk_index": i
|
| 882 |
-
})
|
| 883 |
-
|
| 884 |
-
if processed_chunks:
|
| 885 |
-
print(f"✅ Successfully processed {len(processed_chunks)} chunks for batch {batch_idx + 1}")
|
| 886 |
-
batch_chunks = processed_chunks
|
| 887 |
-
break
|
| 888 |
-
else:
|
| 889 |
-
print(f"❌ Batch {batch_idx + 1} Attempt {attempt + 1}: No valid chunks processed")
|
| 890 |
-
continue
|
| 891 |
-
|
| 892 |
-
except Exception as e:
|
| 893 |
-
print(f"❌ Batch {batch_idx + 1} Attempt {attempt + 1} failed: {e}")
|
| 894 |
-
if attempt == max_retries - 1:
|
| 895 |
-
print(f"❌ All {max_retries} attempts failed for batch {batch_idx + 1}")
|
| 896 |
-
|
| 897 |
-
# Add successful batch chunks to all_chunks
|
| 898 |
-
if batch_chunks:
|
| 899 |
-
all_chunks.extend(batch_chunks)
|
| 900 |
-
print(f"📊 Total chunks so far: {len(all_chunks)}")
|
| 901 |
-
else:
|
| 902 |
-
print(f"⚠️ Batch {batch_idx + 1} failed completely, skipping...")
|
| 903 |
-
|
| 904 |
-
# Final results
|
| 905 |
-
if all_chunks:
|
| 906 |
-
print(f"✅ Successfully processed document with {len(all_chunks)} total chunks from {len(batches)} batches")
|
| 907 |
-
# Re-index all chunks sequentially
|
| 908 |
-
for i, chunk in enumerate(all_chunks):
|
| 909 |
-
chunk["chunk_index"] = i
|
| 910 |
-
return all_chunks, document_markdown
|
| 911 |
-
else:
|
| 912 |
-
print("🔄 All batches failed, falling back to programmatic chunking...")
|
| 913 |
-
chunks, original_markdown = programmatic_chunk_document(document_markdown)
|
| 914 |
-
return chunks, original_markdown
|
| 915 |
-
|
| 916 |
-
try:
|
| 917 |
-
# Initialize Fireworks LLM with structured output
|
| 918 |
-
llm = init_chat_model(
|
| 919 |
-
"accounts/fireworks/models/llama4-maverick-instruct-basic",
|
| 920 |
-
model_provider="fireworks",
|
| 921 |
-
api_key=fireworks_api_key
|
| 922 |
-
)
|
| 923 |
-
|
| 924 |
-
# Create structured LLM that returns ChunkList object
|
| 925 |
-
structured_llm = llm.with_structured_output(ChunkList)
|
| 926 |
-
|
| 927 |
-
# Create improved chunking prompt that returns complete chunk text
|
| 928 |
-
prompt = f"""## Task
|
| 929 |
-
Analyze this academic document and create logical educational chunks. Each chunk should contain 2-3 related educational concepts or lessons that a student would naturally learn together.
|
| 930 |
-
|
| 931 |
-
## Step-by-Step Process
|
| 932 |
-
1. **Scan the document** to identify main topics and educational concepts
|
| 933 |
-
2. **Group related paragraphs** that teach connected ideas (even if separated by figures)
|
| 934 |
-
3. **Create separate chunks** for figures/tables with their captions
|
| 935 |
-
4. **Ensure each chunk** contains 2-3 educational lessons that build on each other
|
| 936 |
-
5. **Preserve all formatting** exactly as written
|
| 937 |
-
|
| 938 |
-
## Chunking Rules
|
| 939 |
-
|
| 940 |
-
### Content Rules
|
| 941 |
-
- **Combine related content**: If a concept is split by a figure placement, reunite the related paragraphs in one chunk
|
| 942 |
-
- **2-3 educational lessons per chunk**: Each chunk should teach 2-3 connected concepts that logically belong together
|
| 943 |
-
- **Preserve complete thoughts**: Never split sentences, mathematical expressions, or LaTeX formulas
|
| 944 |
-
- **Skip metadata sections**: Exclude abstracts, acknowledgments, references, author info, page numbers
|
| 945 |
-
|
| 946 |
-
### Formatting Rules
|
| 947 |
-
- **Preserve exactly**: All markdown, LaTeX, mathematical notation, and formatting
|
| 948 |
-
- **Include paragraph breaks**: Maintain original \\n\\n paragraph separations
|
| 949 |
-
- **Remove artifacts**: Strip page numbers, headers, footers, and formatting metadata
|
| 950 |
-
|
| 951 |
-
### Special Elements
|
| 952 |
-
- **Figures/Tables/Images**: Create separate chunks containing the full caption and any accompanying text
|
| 953 |
-
- **Mathematical expressions**: Keep complete formulas together, never split LaTeX
|
| 954 |
-
- **Code blocks**: Preserve in their entirety with proper formatting
|
| 955 |
-
|
| 956 |
-
## Output Format
|
| 957 |
-
Return a JSON object with this exact schema:
|
| 958 |
-
|
| 959 |
-
```json
|
| 960 |
-
{{
|
| 961 |
-
"chunks": [
|
| 962 |
-
{{
|
| 963 |
-
"topic": "Brief descriptive name (2-6 words) for the educational content",
|
| 964 |
-
"text": "Complete chunk text with exact markdown/LaTeX formatting preserved"
|
| 965 |
-
}}
|
| 966 |
-
]
|
| 967 |
-
}}
|
| 968 |
-
```
|
| 969 |
-
|
| 970 |
-
## Quality Criteria
|
| 971 |
-
**Good chunks:**
|
| 972 |
-
- Contain 2-3 related educational concepts
|
| 973 |
-
- Are 150-500 words (optimal learning unit size)
|
| 974 |
-
- Have clear educational value and logical flow
|
| 975 |
-
- Preserve all original formatting perfectly
|
| 976 |
-
|
| 977 |
-
**Avoid:**
|
| 978 |
-
- Single-sentence chunks
|
| 979 |
-
- Chunks with >5 unrelated concepts
|
| 980 |
-
- Split mathematical expressions
|
| 981 |
-
- Metadata or reference content
|
| 982 |
-
|
| 983 |
-
## Examples
|
| 984 |
-
|
| 985 |
-
**Good chunk example:**
|
| 986 |
-
```json
|
| 987 |
-
{{
|
| 988 |
-
"chunks": [
|
| 989 |
-
{{
|
| 990 |
-
"topic": "Gradient Descent Fundamentals",
|
| 991 |
-
"text": "## Gradient Descent Algorithm\\n\\nGradient descent is an optimization algorithm used to minimize functions...\\n\\n### Mathematical Formulation\\n\\nThe update rule is given by:\\n\\n$\\theta_{{t+1}} = \\theta_t - \\alpha \\nabla f(\\theta_t)$\\n\\nwhere $\\alpha$ is the learning rate..."
|
| 992 |
-
}}
|
| 993 |
-
]
|
| 994 |
-
}}
|
| 995 |
-
```
|
| 996 |
-
|
| 997 |
-
**Bad chunk example:**
|
| 998 |
-
```json
|
| 999 |
-
{{
|
| 1000 |
-
"chunks": [
|
| 1001 |
-
{{
|
| 1002 |
-
"topic": "Introduction",
|
| 1003 |
-
"text": "This paper presents..."
|
| 1004 |
-
}}
|
| 1005 |
-
]
|
| 1006 |
-
}}
|
| 1007 |
-
```
|
| 1008 |
-
(Too brief, not educational content)
|
| 1009 |
-
|
| 1010 |
-
---
|
| 1011 |
-
|
| 1012 |
-
## Document to Process:
|
| 1013 |
-
{document_markdown}
|
| 1014 |
-
|
| 1015 |
-
Please analyze the document and return the JSON object with chunks following the above guidelines.
|
| 1016 |
-
"""
|
| 1017 |
-
|
| 1018 |
-
# Call Fireworks with structured output
|
| 1019 |
-
print("🚀 Calling Fireworks for document chunking...")
|
| 1020 |
-
try:
|
| 1021 |
-
chunk_response = structured_llm.invoke(prompt)
|
| 1022 |
-
print(f"📋 Raw response type: {type(chunk_response)}")
|
| 1023 |
-
print(f"📋 Raw response: {chunk_response}")
|
| 1024 |
-
except Exception as invoke_error:
|
| 1025 |
-
print(f"❌ Error during Fireworks invoke: {invoke_error}")
|
| 1026 |
-
return [], document_markdown
|
| 1027 |
-
|
| 1028 |
-
if chunk_response is None:
|
| 1029 |
-
print("❌ Received None response from Fireworks")
|
| 1030 |
-
return [], document_markdown
|
| 1031 |
-
|
| 1032 |
-
if not hasattr(chunk_response, 'chunks'):
|
| 1033 |
-
print(f"❌ Response missing 'chunks' attribute: {type(chunk_response)}")
|
| 1034 |
-
print(f"Response content: {chunk_response}")
|
| 1035 |
-
return [], document_markdown
|
| 1036 |
-
|
| 1037 |
-
chunks = chunk_response.chunks
|
| 1038 |
-
if not chunks:
|
| 1039 |
-
print("⚠️ No chunks returned from Fireworks")
|
| 1040 |
-
return [], document_markdown
|
| 1041 |
-
|
| 1042 |
-
# Process chunks with direct text (no fuzzy matching needed)
|
| 1043 |
-
processed_chunks = []
|
| 1044 |
-
for i, chunk in enumerate(chunks):
|
| 1045 |
-
print(f"\n📝 Processing chunk {i+1}: {chunk.topic}")
|
| 1046 |
-
|
| 1047 |
-
# Check if chunk has the expected 'text' attribute
|
| 1048 |
-
if not hasattr(chunk, 'text'):
|
| 1049 |
-
print(f"❌ Chunk missing 'text' attribute: {chunk}")
|
| 1050 |
-
continue
|
| 1051 |
-
|
| 1052 |
-
print(f" Text preview: '{chunk.text[:100]}...'")
|
| 1053 |
-
|
| 1054 |
-
processed_chunks.append({
|
| 1055 |
-
"topic": chunk.topic,
|
| 1056 |
-
"text": chunk.text,
|
| 1057 |
-
"chunk_index": i
|
| 1058 |
-
})
|
| 1059 |
-
|
| 1060 |
-
print(f"📊 Processed {len(processed_chunks)} chunks with direct text")
|
| 1061 |
-
|
| 1062 |
-
return processed_chunks, document_markdown
|
| 1063 |
-
|
| 1064 |
-
except Exception as e:
|
| 1065 |
-
import traceback
|
| 1066 |
-
print(f"❌ Auto-chunking error: {e}")
|
| 1067 |
-
print(f"❌ Full traceback: {traceback.format_exc()}")
|
| 1068 |
-
return [], document_markdown
|
| 1069 |
-
|
| 1070 |
-
@app.post("/chunk_page")
|
| 1071 |
-
async def chunk_page(request: dict):
|
| 1072 |
-
"""Analyze a page and suggest chunks for lessons using Fireworks AI with structured output"""
|
| 1073 |
-
print(f"🧠 Chunking page...")
|
| 1074 |
-
|
| 1075 |
-
page_markdown = request.get("markdown", "")
|
| 1076 |
-
if not page_markdown:
|
| 1077 |
-
raise HTTPException(status_code=400, detail="No markdown provided")
|
| 1078 |
-
|
| 1079 |
-
# Get Fireworks API key
|
| 1080 |
-
fireworks_api_key = os.environ.get("FIREWORKS_API_KEY")
|
| 1081 |
-
if not fireworks_api_key:
|
| 1082 |
-
raise HTTPException(status_code=500, detail="FIREWORKS_API_KEY not set")
|
| 1083 |
-
|
| 1084 |
-
try:
|
| 1085 |
-
# Initialize Fireworks LLM with structured output
|
| 1086 |
-
llm = init_chat_model(
|
| 1087 |
-
"accounts/fireworks/models/llama4-maverick-instruct-basic",
|
| 1088 |
-
model_provider="fireworks",
|
| 1089 |
-
api_key=fireworks_api_key
|
| 1090 |
-
)
|
| 1091 |
-
|
| 1092 |
-
# Create structured LLM that returns ChunkList object
|
| 1093 |
-
structured_llm = llm.with_structured_output(ChunkList)
|
| 1094 |
-
|
| 1095 |
-
# Create chunking prompt
|
| 1096 |
-
prompt = f"""Analyze this academic document page and identify chunks suitable for creating interactive lessons.
|
| 1097 |
-
|
| 1098 |
-
DOCUMENT PAGE:
|
| 1099 |
-
{page_markdown}
|
| 1100 |
-
|
| 1101 |
-
Rules:
|
| 1102 |
-
1. Each chunk should contain 2-3 valuable lessons
|
| 1103 |
-
2. start_phrase and end_phrase should be 5-15 words long
|
| 1104 |
-
3. Focus on educational content (concepts, examples, key points)
|
| 1105 |
-
4. More dense content should have more chunks, less dense content fewer chunks
|
| 1106 |
-
5. Identify chunks that would make good interactive lessons
|
| 1107 |
-
6. SKIP chunks from abstract, references, author information, page numbers, etc.
|
| 1108 |
-
|
| 1109 |
-
Return a list of chunks with topic, start_phrase, and end_phrase for each."""
|
| 1110 |
-
|
| 1111 |
-
# Call Fireworks with structured output
|
| 1112 |
-
print("🚀 Calling Fireworks for chunking...")
|
| 1113 |
-
chunk_response = structured_llm.invoke(prompt)
|
| 1114 |
-
chunks = chunk_response.chunks
|
| 1115 |
-
print(f"📝 Received {len(chunks)} chunks from Fireworks")
|
| 1116 |
-
|
| 1117 |
-
# Process chunks with direct text (no fuzzy matching needed)
|
| 1118 |
-
processed_chunks = []
|
| 1119 |
-
for i, chunk in enumerate(chunks):
|
| 1120 |
-
processed_chunks.append({
|
| 1121 |
-
"topic": chunk.topic,
|
| 1122 |
-
"text": chunk.text,
|
| 1123 |
-
"chunk_index": i
|
| 1124 |
-
})
|
| 1125 |
-
print(f"✅ Processed chunk: {chunk.topic}")
|
| 1126 |
-
|
| 1127 |
-
print(f"📊 Successfully processed {len(processed_chunks)} chunks")
|
| 1128 |
-
|
| 1129 |
-
return {
|
| 1130 |
-
"chunks": processed_chunks,
|
| 1131 |
-
"total_found": len(processed_chunks),
|
| 1132 |
-
"total_suggested": len(chunks)
|
| 1133 |
-
}
|
| 1134 |
-
|
| 1135 |
-
except Exception as e:
|
| 1136 |
-
import traceback
|
| 1137 |
-
print(f"❌ Error chunking page: {e}")
|
| 1138 |
-
print(f"❌ Full traceback: {traceback.format_exc()}")
|
| 1139 |
-
raise HTTPException(status_code=500, detail=f"Error chunking page: {str(e)}")
|
| 1140 |
-
|
| 1141 |
-
@app.post("/start_chunk_lesson/{file_id}/{chunk_index}")
|
| 1142 |
-
async def start_chunk_lesson(file_id: str, chunk_index: int, request: dict):
|
| 1143 |
-
"""Start a Socratic teaching session for a specific chunk using Claude"""
|
| 1144 |
-
print(f"🎓 Starting lesson for chunk {chunk_index} in file {file_id}")
|
| 1145 |
-
|
| 1146 |
-
# Get Anthropic API key
|
| 1147 |
-
anthropic_api_key = os.environ.get("ANTHROPIC_API_KEY")
|
| 1148 |
-
if not anthropic_api_key:
|
| 1149 |
-
raise HTTPException(status_code=500, detail="ANTHROPIC_API_KEY not set")
|
| 1150 |
-
|
| 1151 |
-
try:
|
| 1152 |
-
# Extract data from request
|
| 1153 |
-
chunk_data = request.get("chunk", {})
|
| 1154 |
-
document_markdown = request.get("document_markdown", "")
|
| 1155 |
-
|
| 1156 |
-
if not chunk_data or not document_markdown:
|
| 1157 |
-
raise HTTPException(status_code=400, detail="Missing chunk data or document markdown")
|
| 1158 |
-
|
| 1159 |
-
# Get the specific chunk text for focus
|
| 1160 |
-
start_pos = chunk_data.get("start_position")
|
| 1161 |
-
end_pos = chunk_data.get("end_position")
|
| 1162 |
-
|
| 1163 |
-
if start_pos is not None and end_pos is not None:
|
| 1164 |
-
chunk_text = document_markdown[start_pos:end_pos]
|
| 1165 |
-
print(f"📍 Extracted chunk text: {chunk_text[:100]}...")
|
| 1166 |
-
else:
|
| 1167 |
-
chunk_text = f"Focus area: {chunk_data.get('topic', 'Selected content')}"
|
| 1168 |
-
print("⚠️ No positions found, using topic as fallback")
|
| 1169 |
-
|
| 1170 |
-
# Initialize Anthropic client
|
| 1171 |
-
client = anthropic.Anthropic(api_key=anthropic_api_key)
|
| 1172 |
-
|
| 1173 |
-
# Create the system prompt for Socratic teaching
|
| 1174 |
-
system_prompt = """You are a Socratic teacher. Your goal is to help students deeply understand concepts through guided questioning, not by giving direct answers.
|
| 1175 |
-
|
| 1176 |
-
Your teaching approach:
|
| 1177 |
-
1. Ask thoughtful questions that guide discovery
|
| 1178 |
-
2. Be encouraging and patient
|
| 1179 |
-
3. Help students think critically about the material
|
| 1180 |
-
4. Don't give direct answers - guide them to find answers themselves
|
| 1181 |
-
5. Start with 2-3 opening questions to check understanding and spark curiosity
|
| 1182 |
-
|
| 1183 |
-
Focus on the specific chunk highlighted, but use the full document context to create meaningful questions."""
|
| 1184 |
-
|
| 1185 |
-
# Create the user prompt with FULL document + focused chunk
|
| 1186 |
-
user_prompt = f"""Here's a complete educational document, with a specific section that the student wants to focus on:
|
| 1187 |
-
|
| 1188 |
-
FULL DOCUMENT:
|
| 1189 |
-
{document_markdown}
|
| 1190 |
-
|
| 1191 |
-
FOCUSED SECTION:
|
| 1192 |
-
{chunk_text}
|
| 1193 |
-
|
| 1194 |
-
The student has selected the "FOCUSED SECTION" to study. Please create 2-3 Socratic questions that will help them deeply understand this specific section, while drawing on the broader document context when helpful.
|
| 1195 |
-
|
| 1196 |
-
Make the questions specific and thought-provoking to encourage critical thinking about the focused content."""
|
| 1197 |
-
|
| 1198 |
-
# Call Claude
|
| 1199 |
-
print("🤖 Calling Claude for Socratic questions...")
|
| 1200 |
-
response = client.messages.create(
|
| 1201 |
-
model="claude-sonnet-4-20250514",
|
| 1202 |
-
max_tokens=1000,
|
| 1203 |
-
system=system_prompt,
|
| 1204 |
-
messages=[
|
| 1205 |
-
{"role": "user", "content": user_prompt}
|
| 1206 |
-
]
|
| 1207 |
-
)
|
| 1208 |
-
|
| 1209 |
-
# Extract the response text
|
| 1210 |
-
questions_text = response.content[0].text
|
| 1211 |
-
print(f"✅ Received Socratic questions from Claude")
|
| 1212 |
|
| 1213 |
return {
|
| 1214 |
-
"
|
| 1215 |
-
"
|
| 1216 |
-
"
|
|
|
|
|
|
|
| 1217 |
}
|
| 1218 |
|
| 1219 |
except Exception as e:
|
| 1220 |
-
|
| 1221 |
-
|
| 1222 |
-
print(f"❌ Full traceback: {traceback.format_exc()}")
|
| 1223 |
-
raise HTTPException(status_code=500, detail=f"Error starting lesson: {str(e)}")
|
| 1224 |
|
| 1225 |
# Mount static files for production deployment
|
| 1226 |
frontend_path = os.path.join(os.path.dirname(__file__), "..", "frontend")
|
|
|
|
| 2 |
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
from fastapi.staticfiles import StaticFiles
|
| 4 |
from fastapi.responses import FileResponse
|
|
|
|
| 5 |
import os
|
| 6 |
import tempfile
|
|
|
|
|
|
|
|
|
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
+
from pydantic import BaseModel
|
|
|
|
| 9 |
from typing import Optional, List
|
|
|
|
| 10 |
import anthropic
|
| 11 |
+
|
|
|
|
| 12 |
# Load environment variables
|
| 13 |
load_dotenv()
|
| 14 |
|
|
|
|
| 100 |
|
| 101 |
@app.post("/upload_pdf")
|
| 102 |
async def upload_pdf(file: UploadFile = File(...)):
|
| 103 |
+
"""Simple PDF upload endpoint that saves the file locally"""
|
| 104 |
+
print(f"📄 Uploading file: {file.filename}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
# Read PDF bytes
|
| 108 |
file_bytes = await file.read()
|
| 109 |
print(f"📊 File size: {len(file_bytes)} bytes")
|
| 110 |
|
| 111 |
+
# Create temporary file to save PDF
|
| 112 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
| 113 |
temp_file.write(file_bytes)
|
| 114 |
temp_file_path = temp_file.name
|
| 115 |
|
| 116 |
+
print(f"✅ PDF saved to: {temp_file_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
return {
|
| 119 |
+
"message": "PDF uploaded successfully!",
|
| 120 |
+
"file_path": temp_file_path,
|
| 121 |
+
"filename": file.filename,
|
| 122 |
+
"status": "uploaded",
|
| 123 |
+
"size": len(file_bytes)
|
| 124 |
}
|
| 125 |
|
| 126 |
except Exception as e:
|
| 127 |
+
print(f"❌ Error uploading PDF: {e}")
|
| 128 |
+
raise HTTPException(status_code=500, detail=f"PDF upload error: {str(e)}")
|
|
|
|
|
|
|
| 129 |
|
| 130 |
# Mount static files for production deployment
|
| 131 |
frontend_path = os.path.join(os.path.dirname(__file__), "..", "frontend")
|
backend/requirements.txt
CHANGED
|
@@ -1,13 +1,6 @@
|
|
| 1 |
uvicorn[standard]
|
| 2 |
fastapi==0.115.7
|
| 3 |
python-multipart>=0.0.5
|
| 4 |
-
mistralai
|
| 5 |
python-dotenv
|
| 6 |
-
fireworks-ai
|
| 7 |
-
langchain[fireworks]
|
| 8 |
-
langchain
|
| 9 |
-
langchain-core
|
| 10 |
-
langchain-fireworks
|
| 11 |
pydantic
|
| 12 |
-
anthropic
|
| 13 |
-
google-genai
|
|
|
|
| 1 |
uvicorn[standard]
|
| 2 |
fastapi==0.115.7
|
| 3 |
python-multipart>=0.0.5
|
|
|
|
| 4 |
python-dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
pydantic
|
| 6 |
+
anthropic
|
|
|
frontend/components.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"$schema": "https://ui.shadcn.com/schema.json",
|
| 3 |
+
"style": "new-york",
|
| 4 |
+
"rsc": false,
|
| 5 |
+
"tsx": false,
|
| 6 |
+
"tailwind": {
|
| 7 |
+
"config": "tailwind.config.js",
|
| 8 |
+
"css": "src/index.css",
|
| 9 |
+
"baseColor": "neutral",
|
| 10 |
+
"cssVariables": true,
|
| 11 |
+
"prefix": ""
|
| 12 |
+
},
|
| 13 |
+
"aliases": {
|
| 14 |
+
"components": "@/components",
|
| 15 |
+
"utils": "@/lib/utils",
|
| 16 |
+
"ui": "@/components/ui",
|
| 17 |
+
"lib": "@/lib",
|
| 18 |
+
"hooks": "@/hooks"
|
| 19 |
+
},
|
| 20 |
+
"iconLibrary": "lucide"
|
| 21 |
+
}
|
frontend/jsconfig.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"compilerOptions": {
|
| 3 |
+
"baseUrl": ".",
|
| 4 |
+
"paths": {
|
| 5 |
+
"@/*": ["./src/*"]
|
| 6 |
+
}
|
| 7 |
+
},
|
| 8 |
+
"include": ["src"]
|
| 9 |
+
}
|
frontend/package-lock.json
CHANGED
|
@@ -8,11 +8,16 @@
|
|
| 8 |
"name": "frontend",
|
| 9 |
"version": "0.0.0",
|
| 10 |
"dependencies": {
|
|
|
|
| 11 |
"@llamaindex/chat-ui": "^0.5.17",
|
| 12 |
"@swc/helpers": "^0.5.17",
|
| 13 |
"@tailwindcss/postcss": "^4.1.11",
|
|
|
|
| 14 |
"autoprefixer": "^10.4.21",
|
|
|
|
|
|
|
| 15 |
"katex": "^0.16.22",
|
|
|
|
| 16 |
"postcss": "^8.5.6",
|
| 17 |
"react": "^18.3.1",
|
| 18 |
"react-dom": "^18.3.1",
|
|
@@ -23,6 +28,7 @@
|
|
| 23 |
"rehype-katex": "^7.0.1",
|
| 24 |
"rehype-raw": "^7.0.0",
|
| 25 |
"remark-math": "^6.0.0",
|
|
|
|
| 26 |
"tailwindcss": "^4.1.11"
|
| 27 |
},
|
| 28 |
"devDependencies": {
|
|
@@ -34,9 +40,80 @@
|
|
| 34 |
"eslint-plugin-react-hooks": "^5.2.0",
|
| 35 |
"eslint-plugin-react-refresh": "^0.4.20",
|
| 36 |
"globals": "^16.3.0",
|
|
|
|
| 37 |
"vite": "^7.0.4"
|
| 38 |
}
|
| 39 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
"node_modules/@alloc/quick-lru": {
|
| 41 |
"version": "5.2.0",
|
| 42 |
"resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
|
|
@@ -3186,6 +3263,15 @@
|
|
| 3186 |
"integrity": "sha512-7NXolsK4CAS5+xvdj5OMMbI962hU/wvwoxk+LWR9Ek9bVtyuuYScDN6eS0rUm6TxApFpw7CX1o4uJzcd4AyD3Q==",
|
| 3187 |
"license": "MIT"
|
| 3188 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3189 |
"node_modules/@llamaindex/chat-ui/node_modules/make-cancellable-promise": {
|
| 3190 |
"version": "1.3.2",
|
| 3191 |
"resolved": "https://registry.npmjs.org/make-cancellable-promise/-/make-cancellable-promise-1.3.2.tgz",
|
|
@@ -4812,6 +4898,16 @@
|
|
| 4812 |
"inline-style-parser": "0.1.1"
|
| 4813 |
}
|
| 4814 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4815 |
"node_modules/@llamaindex/chat-ui/node_modules/unified": {
|
| 4816 |
"version": "10.1.2",
|
| 4817 |
"resolved": "https://registry.npmjs.org/unified/-/unified-10.1.2.tgz",
|
|
@@ -5117,6 +5213,15 @@
|
|
| 5117 |
"integrity": "sha512-CecwLWx3rhxVQF6V4bAgPS5t+So2sTbPgAzafKkVizyi7tlwpcFpdFqq+wqF2OwNBmqFuu6tOyouTuxgpMfzmA==",
|
| 5118 |
"license": "MIT"
|
| 5119 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5120 |
"node_modules/@radix-ui/colors": {
|
| 5121 |
"version": "3.0.0",
|
| 5122 |
"resolved": "https://registry.npmjs.org/@radix-ui/colors/-/colors-3.0.0.tgz",
|
|
@@ -5850,6 +5955,12 @@
|
|
| 5850 |
"win32"
|
| 5851 |
]
|
| 5852 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5853 |
"node_modules/@stitches/core": {
|
| 5854 |
"version": "1.2.8",
|
| 5855 |
"resolved": "https://registry.npmjs.org/@stitches/core/-/core-1.2.8.tgz",
|
|
@@ -6384,6 +6495,24 @@
|
|
| 6384 |
"acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
|
| 6385 |
}
|
| 6386 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6387 |
"node_modules/ajv": {
|
| 6388 |
"version": "6.12.6",
|
| 6389 |
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
|
|
@@ -7405,6 +7534,15 @@
|
|
| 7405 |
"es5-ext": "~0.10.14"
|
| 7406 |
}
|
| 7407 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7408 |
"node_modules/expand-template": {
|
| 7409 |
"version": "2.0.3",
|
| 7410 |
"resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
|
|
@@ -8151,6 +8289,12 @@
|
|
| 8151 |
"dev": true,
|
| 8152 |
"license": "MIT"
|
| 8153 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8154 |
"node_modules/json-schema-traverse": {
|
| 8155 |
"version": "0.4.1",
|
| 8156 |
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
|
|
@@ -8551,12 +8695,12 @@
|
|
| 8551 |
}
|
| 8552 |
},
|
| 8553 |
"node_modules/lucide-react": {
|
| 8554 |
-
"version": "0.
|
| 8555 |
-
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.
|
| 8556 |
-
"integrity": "sha512-
|
| 8557 |
"license": "ISC",
|
| 8558 |
"peerDependencies": {
|
| 8559 |
-
"react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0
|
| 8560 |
}
|
| 8561 |
},
|
| 8562 |
"node_modules/lz-string": {
|
|
@@ -13899,6 +14043,19 @@
|
|
| 13899 |
"node": ">=8"
|
| 13900 |
}
|
| 13901 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13902 |
"node_modules/tabbable": {
|
| 13903 |
"version": "6.2.0",
|
| 13904 |
"resolved": "https://registry.npmjs.org/tabbable/-/tabbable-6.2.0.tgz",
|
|
@@ -13906,9 +14063,9 @@
|
|
| 13906 |
"license": "MIT"
|
| 13907 |
},
|
| 13908 |
"node_modules/tailwind-merge": {
|
| 13909 |
-
"version": "
|
| 13910 |
-
"resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-
|
| 13911 |
-
"integrity": "sha512-
|
| 13912 |
"license": "MIT",
|
| 13913 |
"funding": {
|
| 13914 |
"type": "github",
|
|
@@ -13993,6 +14150,18 @@
|
|
| 13993 |
"node": ">=18"
|
| 13994 |
}
|
| 13995 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13996 |
"node_modules/tiny-invariant": {
|
| 13997 |
"version": "1.3.3",
|
| 13998 |
"resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz",
|
|
@@ -14127,6 +14296,16 @@
|
|
| 14127 |
"node": "*"
|
| 14128 |
}
|
| 14129 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14130 |
"node_modules/type": {
|
| 14131 |
"version": "2.7.3",
|
| 14132 |
"resolved": "https://registry.npmjs.org/type/-/type-2.7.3.tgz",
|
|
@@ -14376,6 +14555,15 @@
|
|
| 14376 |
}
|
| 14377 |
}
|
| 14378 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14379 |
"node_modules/util-deprecate": {
|
| 14380 |
"version": "1.0.2",
|
| 14381 |
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
|
|
@@ -14627,6 +14815,25 @@
|
|
| 14627 |
"url": "https://github.com/sponsors/sindresorhus"
|
| 14628 |
}
|
| 14629 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14630 |
"node_modules/zwitch": {
|
| 14631 |
"version": "2.0.4",
|
| 14632 |
"resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz",
|
|
|
|
| 8 |
"name": "frontend",
|
| 9 |
"version": "0.0.0",
|
| 10 |
"dependencies": {
|
| 11 |
+
"@ai-sdk/react": "^2.0.11",
|
| 12 |
"@llamaindex/chat-ui": "^0.5.17",
|
| 13 |
"@swc/helpers": "^0.5.17",
|
| 14 |
"@tailwindcss/postcss": "^4.1.11",
|
| 15 |
+
"ai": "^5.0.11",
|
| 16 |
"autoprefixer": "^10.4.21",
|
| 17 |
+
"class-variance-authority": "^0.7.1",
|
| 18 |
+
"clsx": "^2.1.1",
|
| 19 |
"katex": "^0.16.22",
|
| 20 |
+
"lucide-react": "^0.539.0",
|
| 21 |
"postcss": "^8.5.6",
|
| 22 |
"react": "^18.3.1",
|
| 23 |
"react-dom": "^18.3.1",
|
|
|
|
| 28 |
"rehype-katex": "^7.0.1",
|
| 29 |
"rehype-raw": "^7.0.0",
|
| 30 |
"remark-math": "^6.0.0",
|
| 31 |
+
"tailwind-merge": "^3.3.1",
|
| 32 |
"tailwindcss": "^4.1.11"
|
| 33 |
},
|
| 34 |
"devDependencies": {
|
|
|
|
| 40 |
"eslint-plugin-react-hooks": "^5.2.0",
|
| 41 |
"eslint-plugin-react-refresh": "^0.4.20",
|
| 42 |
"globals": "^16.3.0",
|
| 43 |
+
"tw-animate-css": "^1.3.6",
|
| 44 |
"vite": "^7.0.4"
|
| 45 |
}
|
| 46 |
},
|
| 47 |
+
"node_modules/@ai-sdk/gateway": {
|
| 48 |
+
"version": "1.0.5",
|
| 49 |
+
"resolved": "https://registry.npmjs.org/@ai-sdk/gateway/-/gateway-1.0.5.tgz",
|
| 50 |
+
"integrity": "sha512-GOhxiHm2nfuS618Ia13AWxEIhCsj5+tFaw6sjSO7pvMZT03QgFAJyX4xBYj+3i3mfIvw+yJOvyhVu1fI+pAHQA==",
|
| 51 |
+
"license": "Apache-2.0",
|
| 52 |
+
"dependencies": {
|
| 53 |
+
"@ai-sdk/provider": "2.0.0",
|
| 54 |
+
"@ai-sdk/provider-utils": "3.0.2"
|
| 55 |
+
},
|
| 56 |
+
"engines": {
|
| 57 |
+
"node": ">=18"
|
| 58 |
+
},
|
| 59 |
+
"peerDependencies": {
|
| 60 |
+
"zod": "^3.25.76 || ^4"
|
| 61 |
+
}
|
| 62 |
+
},
|
| 63 |
+
"node_modules/@ai-sdk/provider": {
|
| 64 |
+
"version": "2.0.0",
|
| 65 |
+
"resolved": "https://registry.npmjs.org/@ai-sdk/provider/-/provider-2.0.0.tgz",
|
| 66 |
+
"integrity": "sha512-6o7Y2SeO9vFKB8lArHXehNuusnpddKPk7xqL7T2/b+OvXMRIXUO1rR4wcv1hAFUAT9avGZshty3Wlua/XA7TvA==",
|
| 67 |
+
"license": "Apache-2.0",
|
| 68 |
+
"dependencies": {
|
| 69 |
+
"json-schema": "^0.4.0"
|
| 70 |
+
},
|
| 71 |
+
"engines": {
|
| 72 |
+
"node": ">=18"
|
| 73 |
+
}
|
| 74 |
+
},
|
| 75 |
+
"node_modules/@ai-sdk/provider-utils": {
|
| 76 |
+
"version": "3.0.2",
|
| 77 |
+
"resolved": "https://registry.npmjs.org/@ai-sdk/provider-utils/-/provider-utils-3.0.2.tgz",
|
| 78 |
+
"integrity": "sha512-0a5a6VafkV6+0irdpqnub8WE6qzG2VMsDBpXb9NQIz8c4TG8fI+GSTFIL9sqrLEwXrHdiRj7fwJsrir4jClL0w==",
|
| 79 |
+
"license": "Apache-2.0",
|
| 80 |
+
"dependencies": {
|
| 81 |
+
"@ai-sdk/provider": "2.0.0",
|
| 82 |
+
"@standard-schema/spec": "^1.0.0",
|
| 83 |
+
"eventsource-parser": "^3.0.3",
|
| 84 |
+
"zod-to-json-schema": "^3.24.1"
|
| 85 |
+
},
|
| 86 |
+
"engines": {
|
| 87 |
+
"node": ">=18"
|
| 88 |
+
},
|
| 89 |
+
"peerDependencies": {
|
| 90 |
+
"zod": "^3.25.76 || ^4"
|
| 91 |
+
}
|
| 92 |
+
},
|
| 93 |
+
"node_modules/@ai-sdk/react": {
|
| 94 |
+
"version": "2.0.11",
|
| 95 |
+
"resolved": "https://registry.npmjs.org/@ai-sdk/react/-/react-2.0.11.tgz",
|
| 96 |
+
"integrity": "sha512-XL73e7RSOQjYRCJQ96sDY6TxrMJK9YBgI518E6Jy306BjRwy5XyY94e/DN71TE6VpiwDzxixlymfDK90Ro95Jg==",
|
| 97 |
+
"license": "Apache-2.0",
|
| 98 |
+
"dependencies": {
|
| 99 |
+
"@ai-sdk/provider-utils": "3.0.2",
|
| 100 |
+
"ai": "5.0.11",
|
| 101 |
+
"swr": "^2.2.5",
|
| 102 |
+
"throttleit": "2.1.0"
|
| 103 |
+
},
|
| 104 |
+
"engines": {
|
| 105 |
+
"node": ">=18"
|
| 106 |
+
},
|
| 107 |
+
"peerDependencies": {
|
| 108 |
+
"react": "^18 || ^19 || ^19.0.0-rc",
|
| 109 |
+
"zod": "^3.25.76 || ^4"
|
| 110 |
+
},
|
| 111 |
+
"peerDependenciesMeta": {
|
| 112 |
+
"zod": {
|
| 113 |
+
"optional": true
|
| 114 |
+
}
|
| 115 |
+
}
|
| 116 |
+
},
|
| 117 |
"node_modules/@alloc/quick-lru": {
|
| 118 |
"version": "5.2.0",
|
| 119 |
"resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz",
|
|
|
|
| 3263 |
"integrity": "sha512-7NXolsK4CAS5+xvdj5OMMbI962hU/wvwoxk+LWR9Ek9bVtyuuYScDN6eS0rUm6TxApFpw7CX1o4uJzcd4AyD3Q==",
|
| 3264 |
"license": "MIT"
|
| 3265 |
},
|
| 3266 |
+
"node_modules/@llamaindex/chat-ui/node_modules/lucide-react": {
|
| 3267 |
+
"version": "0.453.0",
|
| 3268 |
+
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.453.0.tgz",
|
| 3269 |
+
"integrity": "sha512-kL+RGZCcJi9BvJtzg2kshO192Ddy9hv3ij+cPrVPWSRzgCWCVazoQJxOjAwgK53NomL07HB7GPHW120FimjNhQ==",
|
| 3270 |
+
"license": "ISC",
|
| 3271 |
+
"peerDependencies": {
|
| 3272 |
+
"react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0-rc"
|
| 3273 |
+
}
|
| 3274 |
+
},
|
| 3275 |
"node_modules/@llamaindex/chat-ui/node_modules/make-cancellable-promise": {
|
| 3276 |
"version": "1.3.2",
|
| 3277 |
"resolved": "https://registry.npmjs.org/make-cancellable-promise/-/make-cancellable-promise-1.3.2.tgz",
|
|
|
|
| 4898 |
"inline-style-parser": "0.1.1"
|
| 4899 |
}
|
| 4900 |
},
|
| 4901 |
+
"node_modules/@llamaindex/chat-ui/node_modules/tailwind-merge": {
|
| 4902 |
+
"version": "2.6.0",
|
| 4903 |
+
"resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-2.6.0.tgz",
|
| 4904 |
+
"integrity": "sha512-P+Vu1qXfzediirmHOC3xKGAYeZtPcV9g76X+xg2FD4tYgR71ewMA35Y3sCz3zhiN/dwefRpJX0yBcgwi1fXNQA==",
|
| 4905 |
+
"license": "MIT",
|
| 4906 |
+
"funding": {
|
| 4907 |
+
"type": "github",
|
| 4908 |
+
"url": "https://github.com/sponsors/dcastil"
|
| 4909 |
+
}
|
| 4910 |
+
},
|
| 4911 |
"node_modules/@llamaindex/chat-ui/node_modules/unified": {
|
| 4912 |
"version": "10.1.2",
|
| 4913 |
"resolved": "https://registry.npmjs.org/unified/-/unified-10.1.2.tgz",
|
|
|
|
| 5213 |
"integrity": "sha512-CecwLWx3rhxVQF6V4bAgPS5t+So2sTbPgAzafKkVizyi7tlwpcFpdFqq+wqF2OwNBmqFuu6tOyouTuxgpMfzmA==",
|
| 5214 |
"license": "MIT"
|
| 5215 |
},
|
| 5216 |
+
"node_modules/@opentelemetry/api": {
|
| 5217 |
+
"version": "1.9.0",
|
| 5218 |
+
"resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
|
| 5219 |
+
"integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
|
| 5220 |
+
"license": "Apache-2.0",
|
| 5221 |
+
"engines": {
|
| 5222 |
+
"node": ">=8.0.0"
|
| 5223 |
+
}
|
| 5224 |
+
},
|
| 5225 |
"node_modules/@radix-ui/colors": {
|
| 5226 |
"version": "3.0.0",
|
| 5227 |
"resolved": "https://registry.npmjs.org/@radix-ui/colors/-/colors-3.0.0.tgz",
|
|
|
|
| 5955 |
"win32"
|
| 5956 |
]
|
| 5957 |
},
|
| 5958 |
+
"node_modules/@standard-schema/spec": {
|
| 5959 |
+
"version": "1.0.0",
|
| 5960 |
+
"resolved": "https://registry.npmjs.org/@standard-schema/spec/-/spec-1.0.0.tgz",
|
| 5961 |
+
"integrity": "sha512-m2bOd0f2RT9k8QJx1JN85cZYyH1RqFBdlwtkSlf4tBDYLCiiZnv1fIIwacK6cqwXavOydf0NPToMQgpKq+dVlA==",
|
| 5962 |
+
"license": "MIT"
|
| 5963 |
+
},
|
| 5964 |
"node_modules/@stitches/core": {
|
| 5965 |
"version": "1.2.8",
|
| 5966 |
"resolved": "https://registry.npmjs.org/@stitches/core/-/core-1.2.8.tgz",
|
|
|
|
| 6495 |
"acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
|
| 6496 |
}
|
| 6497 |
},
|
| 6498 |
+
"node_modules/ai": {
|
| 6499 |
+
"version": "5.0.11",
|
| 6500 |
+
"resolved": "https://registry.npmjs.org/ai/-/ai-5.0.11.tgz",
|
| 6501 |
+
"integrity": "sha512-PtiQAnhlWuN3Y2z9PifM/9XIQ0HIoHjZqEu7zHffyGEXiqHLtrJpt4IiGVzUTAKxXM5JCtO9sD/hwGXDp7ZYsw==",
|
| 6502 |
+
"license": "Apache-2.0",
|
| 6503 |
+
"dependencies": {
|
| 6504 |
+
"@ai-sdk/gateway": "1.0.5",
|
| 6505 |
+
"@ai-sdk/provider": "2.0.0",
|
| 6506 |
+
"@ai-sdk/provider-utils": "3.0.2",
|
| 6507 |
+
"@opentelemetry/api": "1.9.0"
|
| 6508 |
+
},
|
| 6509 |
+
"engines": {
|
| 6510 |
+
"node": ">=18"
|
| 6511 |
+
},
|
| 6512 |
+
"peerDependencies": {
|
| 6513 |
+
"zod": "^3.25.76 || ^4"
|
| 6514 |
+
}
|
| 6515 |
+
},
|
| 6516 |
"node_modules/ajv": {
|
| 6517 |
"version": "6.12.6",
|
| 6518 |
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
|
|
|
|
| 7534 |
"es5-ext": "~0.10.14"
|
| 7535 |
}
|
| 7536 |
},
|
| 7537 |
+
"node_modules/eventsource-parser": {
|
| 7538 |
+
"version": "3.0.3",
|
| 7539 |
+
"resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.3.tgz",
|
| 7540 |
+
"integrity": "sha512-nVpZkTMM9rF6AQ9gPJpFsNAMt48wIzB5TQgiTLdHiuO8XEDhUgZEhqKlZWXbIzo9VmJ/HvysHqEaVeD5v9TPvA==",
|
| 7541 |
+
"license": "MIT",
|
| 7542 |
+
"engines": {
|
| 7543 |
+
"node": ">=20.0.0"
|
| 7544 |
+
}
|
| 7545 |
+
},
|
| 7546 |
"node_modules/expand-template": {
|
| 7547 |
"version": "2.0.3",
|
| 7548 |
"resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
|
|
|
|
| 8289 |
"dev": true,
|
| 8290 |
"license": "MIT"
|
| 8291 |
},
|
| 8292 |
+
"node_modules/json-schema": {
|
| 8293 |
+
"version": "0.4.0",
|
| 8294 |
+
"resolved": "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz",
|
| 8295 |
+
"integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==",
|
| 8296 |
+
"license": "(AFL-2.1 OR BSD-3-Clause)"
|
| 8297 |
+
},
|
| 8298 |
"node_modules/json-schema-traverse": {
|
| 8299 |
"version": "0.4.1",
|
| 8300 |
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
|
|
|
|
| 8695 |
}
|
| 8696 |
},
|
| 8697 |
"node_modules/lucide-react": {
|
| 8698 |
+
"version": "0.539.0",
|
| 8699 |
+
"resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.539.0.tgz",
|
| 8700 |
+
"integrity": "sha512-VVISr+VF2krO91FeuCrm1rSOLACQUYVy7NQkzrOty52Y8TlTPcXcMdQFj9bYzBgXbWCiywlwSZ3Z8u6a+6bMlg==",
|
| 8701 |
"license": "ISC",
|
| 8702 |
"peerDependencies": {
|
| 8703 |
+
"react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
| 8704 |
}
|
| 8705 |
},
|
| 8706 |
"node_modules/lz-string": {
|
|
|
|
| 14043 |
"node": ">=8"
|
| 14044 |
}
|
| 14045 |
},
|
| 14046 |
+
"node_modules/swr": {
|
| 14047 |
+
"version": "2.3.6",
|
| 14048 |
+
"resolved": "https://registry.npmjs.org/swr/-/swr-2.3.6.tgz",
|
| 14049 |
+
"integrity": "sha512-wfHRmHWk/isGNMwlLGlZX5Gzz/uTgo0o2IRuTMcf4CPuPFJZlq0rDaKUx+ozB5nBOReNV1kiOyzMfj+MBMikLw==",
|
| 14050 |
+
"license": "MIT",
|
| 14051 |
+
"dependencies": {
|
| 14052 |
+
"dequal": "^2.0.3",
|
| 14053 |
+
"use-sync-external-store": "^1.4.0"
|
| 14054 |
+
},
|
| 14055 |
+
"peerDependencies": {
|
| 14056 |
+
"react": "^16.11.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
| 14057 |
+
}
|
| 14058 |
+
},
|
| 14059 |
"node_modules/tabbable": {
|
| 14060 |
"version": "6.2.0",
|
| 14061 |
"resolved": "https://registry.npmjs.org/tabbable/-/tabbable-6.2.0.tgz",
|
|
|
|
| 14063 |
"license": "MIT"
|
| 14064 |
},
|
| 14065 |
"node_modules/tailwind-merge": {
|
| 14066 |
+
"version": "3.3.1",
|
| 14067 |
+
"resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-3.3.1.tgz",
|
| 14068 |
+
"integrity": "sha512-gBXpgUm/3rp1lMZZrM/w7D8GKqshif0zAymAhbCyIt8KMe+0v9DQ7cdYLR4FHH/cKpdTXb+A/tKKU3eolfsI+g==",
|
| 14069 |
"license": "MIT",
|
| 14070 |
"funding": {
|
| 14071 |
"type": "github",
|
|
|
|
| 14150 |
"node": ">=18"
|
| 14151 |
}
|
| 14152 |
},
|
| 14153 |
+
"node_modules/throttleit": {
|
| 14154 |
+
"version": "2.1.0",
|
| 14155 |
+
"resolved": "https://registry.npmjs.org/throttleit/-/throttleit-2.1.0.tgz",
|
| 14156 |
+
"integrity": "sha512-nt6AMGKW1p/70DF/hGBdJB57B8Tspmbp5gfJ8ilhLnt7kkr2ye7hzD6NVG8GGErk2HWF34igrL2CXmNIkzKqKw==",
|
| 14157 |
+
"license": "MIT",
|
| 14158 |
+
"engines": {
|
| 14159 |
+
"node": ">=18"
|
| 14160 |
+
},
|
| 14161 |
+
"funding": {
|
| 14162 |
+
"url": "https://github.com/sponsors/sindresorhus"
|
| 14163 |
+
}
|
| 14164 |
+
},
|
| 14165 |
"node_modules/tiny-invariant": {
|
| 14166 |
"version": "1.3.3",
|
| 14167 |
"resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz",
|
|
|
|
| 14296 |
"node": "*"
|
| 14297 |
}
|
| 14298 |
},
|
| 14299 |
+
"node_modules/tw-animate-css": {
|
| 14300 |
+
"version": "1.3.6",
|
| 14301 |
+
"resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.3.6.tgz",
|
| 14302 |
+
"integrity": "sha512-9dy0R9UsYEGmgf26L8UcHiLmSFTHa9+D7+dAt/G/sF5dCnPePZbfgDYinc7/UzAM7g/baVrmS6m9yEpU46d+LA==",
|
| 14303 |
+
"dev": true,
|
| 14304 |
+
"license": "MIT",
|
| 14305 |
+
"funding": {
|
| 14306 |
+
"url": "https://github.com/sponsors/Wombosvideo"
|
| 14307 |
+
}
|
| 14308 |
+
},
|
| 14309 |
"node_modules/type": {
|
| 14310 |
"version": "2.7.3",
|
| 14311 |
"resolved": "https://registry.npmjs.org/type/-/type-2.7.3.tgz",
|
|
|
|
| 14555 |
}
|
| 14556 |
}
|
| 14557 |
},
|
| 14558 |
+
"node_modules/use-sync-external-store": {
|
| 14559 |
+
"version": "1.5.0",
|
| 14560 |
+
"resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.5.0.tgz",
|
| 14561 |
+
"integrity": "sha512-Rb46I4cGGVBmjamjphe8L/UnvJD+uPPtTkNvX5mZgqdbavhI4EbgIWJiIHXJ8bc/i9EQGPRh4DwEURJ552Do0A==",
|
| 14562 |
+
"license": "MIT",
|
| 14563 |
+
"peerDependencies": {
|
| 14564 |
+
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
|
| 14565 |
+
}
|
| 14566 |
+
},
|
| 14567 |
"node_modules/util-deprecate": {
|
| 14568 |
"version": "1.0.2",
|
| 14569 |
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
|
|
|
|
| 14815 |
"url": "https://github.com/sponsors/sindresorhus"
|
| 14816 |
}
|
| 14817 |
},
|
| 14818 |
+
"node_modules/zod": {
|
| 14819 |
+
"version": "3.25.76",
|
| 14820 |
+
"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
|
| 14821 |
+
"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
|
| 14822 |
+
"license": "MIT",
|
| 14823 |
+
"peer": true,
|
| 14824 |
+
"funding": {
|
| 14825 |
+
"url": "https://github.com/sponsors/colinhacks"
|
| 14826 |
+
}
|
| 14827 |
+
},
|
| 14828 |
+
"node_modules/zod-to-json-schema": {
|
| 14829 |
+
"version": "3.24.6",
|
| 14830 |
+
"resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.24.6.tgz",
|
| 14831 |
+
"integrity": "sha512-h/z3PKvcTcTetyjl1fkj79MHNEjm+HpD6NXheWjzOekY7kV+lwDYnHw+ivHkijnCSMz1yJaWBD9vu/Fcmk+vEg==",
|
| 14832 |
+
"license": "ISC",
|
| 14833 |
+
"peerDependencies": {
|
| 14834 |
+
"zod": "^3.24.1"
|
| 14835 |
+
}
|
| 14836 |
+
},
|
| 14837 |
"node_modules/zwitch": {
|
| 14838 |
"version": "2.0.4",
|
| 14839 |
"resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz",
|
frontend/package.json
CHANGED
|
@@ -10,11 +10,16 @@
|
|
| 10 |
"preview": "vite preview"
|
| 11 |
},
|
| 12 |
"dependencies": {
|
|
|
|
| 13 |
"@llamaindex/chat-ui": "^0.5.17",
|
| 14 |
"@swc/helpers": "^0.5.17",
|
| 15 |
"@tailwindcss/postcss": "^4.1.11",
|
|
|
|
| 16 |
"autoprefixer": "^10.4.21",
|
|
|
|
|
|
|
| 17 |
"katex": "^0.16.22",
|
|
|
|
| 18 |
"postcss": "^8.5.6",
|
| 19 |
"react": "^18.3.1",
|
| 20 |
"react-dom": "^18.3.1",
|
|
@@ -25,6 +30,7 @@
|
|
| 25 |
"rehype-katex": "^7.0.1",
|
| 26 |
"rehype-raw": "^7.0.0",
|
| 27 |
"remark-math": "^6.0.0",
|
|
|
|
| 28 |
"tailwindcss": "^4.1.11"
|
| 29 |
},
|
| 30 |
"devDependencies": {
|
|
@@ -36,6 +42,7 @@
|
|
| 36 |
"eslint-plugin-react-hooks": "^5.2.0",
|
| 37 |
"eslint-plugin-react-refresh": "^0.4.20",
|
| 38 |
"globals": "^16.3.0",
|
|
|
|
| 39 |
"vite": "^7.0.4"
|
| 40 |
}
|
| 41 |
}
|
|
|
|
| 10 |
"preview": "vite preview"
|
| 11 |
},
|
| 12 |
"dependencies": {
|
| 13 |
+
"@ai-sdk/react": "^2.0.11",
|
| 14 |
"@llamaindex/chat-ui": "^0.5.17",
|
| 15 |
"@swc/helpers": "^0.5.17",
|
| 16 |
"@tailwindcss/postcss": "^4.1.11",
|
| 17 |
+
"ai": "^5.0.11",
|
| 18 |
"autoprefixer": "^10.4.21",
|
| 19 |
+
"class-variance-authority": "^0.7.1",
|
| 20 |
+
"clsx": "^2.1.1",
|
| 21 |
"katex": "^0.16.22",
|
| 22 |
+
"lucide-react": "^0.539.0",
|
| 23 |
"postcss": "^8.5.6",
|
| 24 |
"react": "^18.3.1",
|
| 25 |
"react-dom": "^18.3.1",
|
|
|
|
| 30 |
"rehype-katex": "^7.0.1",
|
| 31 |
"rehype-raw": "^7.0.0",
|
| 32 |
"remark-math": "^6.0.0",
|
| 33 |
+
"tailwind-merge": "^3.3.1",
|
| 34 |
"tailwindcss": "^4.1.11"
|
| 35 |
},
|
| 36 |
"devDependencies": {
|
|
|
|
| 42 |
"eslint-plugin-react-hooks": "^5.2.0",
|
| 43 |
"eslint-plugin-react-refresh": "^0.4.20",
|
| 44 |
"globals": "^16.3.0",
|
| 45 |
+
"tw-animate-css": "^1.3.6",
|
| 46 |
"vite": "^7.0.4"
|
| 47 |
}
|
| 48 |
}
|
frontend/src/App.jsx
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
import { BrowserRouter as Router, Routes, Route } from 'react-router-dom';
|
| 2 |
import Homepage from './components/Homepage';
|
| 3 |
-
import UploadPage from './components/UploadPage';
|
| 4 |
import DocumentProcessor from './components/DocumentProcessor';
|
|
|
|
| 5 |
|
| 6 |
function App() {
|
| 7 |
return (
|
| 8 |
<Router>
|
| 9 |
<Routes>
|
| 10 |
<Route path="/" element={<Homepage />} />
|
| 11 |
-
<Route path="/upload" element={<UploadPage />} />
|
| 12 |
<Route path="/process" element={<DocumentProcessor />} />
|
|
|
|
| 13 |
</Routes>
|
| 14 |
</Router>
|
| 15 |
);
|
|
|
|
| 1 |
import { BrowserRouter as Router, Routes, Route } from 'react-router-dom';
|
| 2 |
import Homepage from './components/Homepage';
|
|
|
|
| 3 |
import DocumentProcessor from './components/DocumentProcessor';
|
| 4 |
+
import TestComponent from './components/TestComponent';
|
| 5 |
|
| 6 |
function App() {
|
| 7 |
return (
|
| 8 |
<Router>
|
| 9 |
<Routes>
|
| 10 |
<Route path="/" element={<Homepage />} />
|
|
|
|
| 11 |
<Route path="/process" element={<DocumentProcessor />} />
|
| 12 |
+
<Route path="/chat" element={<TestComponent />} />
|
| 13 |
</Routes>
|
| 14 |
</Router>
|
| 15 |
);
|
frontend/src/components/ChunkPanel.jsx
CHANGED
|
@@ -2,10 +2,9 @@ import ReactMarkdown from 'react-markdown';
|
|
| 2 |
import remarkMath from 'remark-math';
|
| 3 |
import rehypeKatex from 'rehype-katex';
|
| 4 |
import rehypeRaw from 'rehype-raw';
|
| 5 |
-
import { ChatSection, ChatMessages, ChatInput } from '@llamaindex/chat-ui';
|
| 6 |
-
import '@llamaindex/chat-ui/styles/markdown.css';
|
| 7 |
import { useState } from 'react';
|
| 8 |
-
import {
|
|
|
|
| 9 |
|
| 10 |
const ChunkPanel = ({
|
| 11 |
documentData,
|
|
@@ -15,88 +14,10 @@ const ChunkPanel = ({
|
|
| 15 |
chunkStates,
|
| 16 |
skipChunk,
|
| 17 |
markChunkUnderstood,
|
| 18 |
-
startInteractiveLesson
|
| 19 |
-
fetchImage,
|
| 20 |
-
imageCache,
|
| 21 |
-
setImageCache
|
| 22 |
}) => {
|
| 23 |
-
const chunkMarkdownComponents = getChunkMarkdownComponents(documentData, fetchImage, imageCache, setImageCache);
|
| 24 |
const chatMarkdownComponents = getChatMarkdownComponents();
|
| 25 |
-
|
| 26 |
-
// Custom chat handler that mimics useChat API
|
| 27 |
-
const [messages, setMessages] = useState([{
|
| 28 |
-
id: 'welcome',
|
| 29 |
-
role: 'assistant',
|
| 30 |
-
content: `I'm here to help you understand this section: **${documentData?.chunks?.[currentChunkIndex]?.topic || 'Loading...'}**\n\nFeel free to ask me any questions about the content!`
|
| 31 |
-
}]);
|
| 32 |
-
const [input, setInput] = useState('');
|
| 33 |
-
const [isLoading, setIsLoading] = useState(false);
|
| 34 |
-
|
| 35 |
-
const handleInputChange = (e) => {
|
| 36 |
-
setInput(e.target.value);
|
| 37 |
-
};
|
| 38 |
-
|
| 39 |
-
const handleSubmit = async (e) => {
|
| 40 |
-
e.preventDefault();
|
| 41 |
-
if (!input.trim() || isLoading) return;
|
| 42 |
-
|
| 43 |
-
const userMessage = {
|
| 44 |
-
id: Date.now().toString(),
|
| 45 |
-
role: 'user',
|
| 46 |
-
content: input
|
| 47 |
-
};
|
| 48 |
-
|
| 49 |
-
setMessages(prev => [...prev, userMessage]);
|
| 50 |
-
setInput('');
|
| 51 |
-
setIsLoading(true);
|
| 52 |
-
|
| 53 |
-
try {
|
| 54 |
-
// For now, using backend proxy to avoid exposing API key in frontend
|
| 55 |
-
const response = await fetch('/api/anthropic-chat', {
|
| 56 |
-
method: 'POST',
|
| 57 |
-
headers: {
|
| 58 |
-
'Content-Type': 'application/json',
|
| 59 |
-
},
|
| 60 |
-
body: JSON.stringify({
|
| 61 |
-
messages: [...messages, userMessage].map(msg => ({
|
| 62 |
-
role: msg.role,
|
| 63 |
-
content: msg.content
|
| 64 |
-
})),
|
| 65 |
-
context: {
|
| 66 |
-
topic: documentData?.chunks?.[currentChunkIndex]?.topic,
|
| 67 |
-
chunkText: documentData?.chunks?.[currentChunkIndex]?.text
|
| 68 |
-
}
|
| 69 |
-
})
|
| 70 |
-
});
|
| 71 |
-
|
| 72 |
-
const data = await response.json();
|
| 73 |
-
|
| 74 |
-
const assistantMessage = {
|
| 75 |
-
id: (Date.now() + 1).toString(),
|
| 76 |
-
role: 'assistant',
|
| 77 |
-
content: data.content || data.message
|
| 78 |
-
};
|
| 79 |
-
|
| 80 |
-
setMessages(prev => [...prev, assistantMessage]);
|
| 81 |
-
} catch (error) {
|
| 82 |
-
console.error('Chat error:', error);
|
| 83 |
-
setMessages(prev => [...prev, {
|
| 84 |
-
id: (Date.now() + 1).toString(),
|
| 85 |
-
role: 'assistant',
|
| 86 |
-
content: 'Sorry, I encountered an error. Please try again.'
|
| 87 |
-
}]);
|
| 88 |
-
} finally {
|
| 89 |
-
setIsLoading(false);
|
| 90 |
-
}
|
| 91 |
-
};
|
| 92 |
-
|
| 93 |
-
const chatHandler = {
|
| 94 |
-
messages,
|
| 95 |
-
input,
|
| 96 |
-
handleInputChange,
|
| 97 |
-
handleSubmit,
|
| 98 |
-
isLoading
|
| 99 |
-
};
|
| 100 |
|
| 101 |
return (
|
| 102 |
<>
|
|
@@ -124,49 +45,82 @@ const ChunkPanel = ({
|
|
| 124 |
</span>
|
| 125 |
</button>
|
| 126 |
|
| 127 |
-
<button
|
| 128 |
-
onClick={markChunkUnderstood}
|
| 129 |
-
className="py-2 px-4 bg-gray-50 hover:bg-gray-100 text-gray-600 rounded-lg transition-all text-sm"
|
| 130 |
-
>
|
| 131 |
-
✓
|
| 132 |
-
</button>
|
| 133 |
</div>
|
| 134 |
|
| 135 |
{/* Expandable Chunk Content */}
|
| 136 |
{chunkExpanded && documentData?.chunks?.[currentChunkIndex] && (
|
| 137 |
-
|
| 138 |
-
<
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
>
|
| 143 |
-
|
| 144 |
-
</
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
</div>
|
| 146 |
)}
|
| 147 |
</div>
|
| 148 |
|
| 149 |
-
{/* Chat Interface */}
|
| 150 |
-
|
| 151 |
-
<div className="flex-1
|
| 152 |
-
<
|
| 153 |
-
<ChatMessages
|
| 154 |
-
className="p-4"
|
| 155 |
-
showCopy={false}
|
| 156 |
-
/>
|
| 157 |
-
<ChatInput>
|
| 158 |
-
<ChatInput.Form className="bg-white rounded-lg mx-4 mb-4 border border-gray-200 relative">
|
| 159 |
-
<ChatInput.Field
|
| 160 |
-
type="textarea"
|
| 161 |
-
className="resize-none border-0 focus:ring-0 pr-12"
|
| 162 |
-
placeholder="Ask about this section..."
|
| 163 |
-
/>
|
| 164 |
-
<ChatInput.Submit className="absolute right-2 bottom-2 w-8 h-8 rounded-full bg-gray-500 hover:bg-gray-600 text-white border-0 flex items-center justify-center" />
|
| 165 |
-
</ChatInput.Form>
|
| 166 |
-
</ChatInput>
|
| 167 |
-
</ChatSection>
|
| 168 |
</div>
|
| 169 |
-
|
| 170 |
</>
|
| 171 |
);
|
| 172 |
};
|
|
|
|
| 2 |
import remarkMath from 'remark-math';
|
| 3 |
import rehypeKatex from 'rehype-katex';
|
| 4 |
import rehypeRaw from 'rehype-raw';
|
|
|
|
|
|
|
| 5 |
import { useState } from 'react';
|
| 6 |
+
import { getChatMarkdownComponents } from '../utils/markdownComponents.jsx';
|
| 7 |
+
import SimpleChat from './SimpleChat.jsx';
|
| 8 |
|
| 9 |
const ChunkPanel = ({
|
| 10 |
documentData,
|
|
|
|
| 14 |
chunkStates,
|
| 15 |
skipChunk,
|
| 16 |
markChunkUnderstood,
|
| 17 |
+
startInteractiveLesson
|
|
|
|
|
|
|
|
|
|
| 18 |
}) => {
|
|
|
|
| 19 |
const chatMarkdownComponents = getChatMarkdownComponents();
|
| 20 |
+
const [showChat, setShowChat] = useState(false);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
return (
|
| 23 |
<>
|
|
|
|
| 45 |
</span>
|
| 46 |
</button>
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
</div>
|
| 49 |
|
| 50 |
{/* Expandable Chunk Content */}
|
| 51 |
{chunkExpanded && documentData?.chunks?.[currentChunkIndex] && (
|
| 52 |
+
<>
|
| 53 |
+
<div className="prose prose-sm max-w-none">
|
| 54 |
+
<ReactMarkdown
|
| 55 |
+
remarkPlugins={[remarkMath]}
|
| 56 |
+
rehypePlugins={[rehypeRaw, rehypeKatex]}
|
| 57 |
+
components={chatMarkdownComponents}
|
| 58 |
+
>
|
| 59 |
+
{documentData.chunks[currentChunkIndex].text}
|
| 60 |
+
</ReactMarkdown>
|
| 61 |
+
</div>
|
| 62 |
+
|
| 63 |
+
{/* Action Buttons */}
|
| 64 |
+
<div className="flex items-center justify-center gap-4 mt-4 pt-4 border-gray-200">
|
| 65 |
+
<button
|
| 66 |
+
onClick={skipChunk}
|
| 67 |
+
className="py-2 px-4 bg-white hover:bg-gray-50 border border-gray-300 rounded-lg transition-all text-sm"
|
| 68 |
+
>
|
| 69 |
+
Skip
|
| 70 |
+
</button>
|
| 71 |
+
<button
|
| 72 |
+
onClick={() => setShowChat(!showChat)}
|
| 73 |
+
className="py-2 px-4 bg-white hover:bg-gray-50 border border-gray-300 rounded-lg transition-all text-sm flex items-center gap-1"
|
| 74 |
+
>
|
| 75 |
+
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="1.5">
|
| 76 |
+
<path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"/>
|
| 77 |
+
</svg>
|
| 78 |
+
Chat
|
| 79 |
+
</button>
|
| 80 |
+
<button
|
| 81 |
+
onClick={markChunkUnderstood}
|
| 82 |
+
className="py-2 px-4 bg-white hover:bg-gray-50 border border-gray-300 rounded-lg transition-all text-sm"
|
| 83 |
+
>
|
| 84 |
+
Understood
|
| 85 |
+
</button>
|
| 86 |
+
</div>
|
| 87 |
+
</>
|
| 88 |
+
)}
|
| 89 |
+
|
| 90 |
+
{/* Show buttons even when chunk is collapsed */}
|
| 91 |
+
{!chunkExpanded && (
|
| 92 |
+
<div className="flex items-center justify-center gap-4 mt-4 pt-4 border-t border-gray-200">
|
| 93 |
+
<button
|
| 94 |
+
onClick={skipChunk}
|
| 95 |
+
className="py-2 px-4 bg-white hover:bg-gray-50 border border-gray-300 rounded-lg transition-all text-sm"
|
| 96 |
>
|
| 97 |
+
Skip
|
| 98 |
+
</button>
|
| 99 |
+
<button
|
| 100 |
+
onClick={() => setShowChat(!showChat)}
|
| 101 |
+
className="py-2 px-4 bg-white hover:bg-gray-50 border border-gray-300 rounded-lg transition-all text-sm flex items-center gap-1"
|
| 102 |
+
>
|
| 103 |
+
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="1.5">
|
| 104 |
+
<path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"/>
|
| 105 |
+
</svg>
|
| 106 |
+
Chat
|
| 107 |
+
</button>
|
| 108 |
+
<button
|
| 109 |
+
onClick={markChunkUnderstood}
|
| 110 |
+
className="py-2 px-4 bg-white hover:bg-gray-50 border border-gray-300 rounded-lg transition-all text-sm"
|
| 111 |
+
>
|
| 112 |
+
Understood
|
| 113 |
+
</button>
|
| 114 |
</div>
|
| 115 |
)}
|
| 116 |
</div>
|
| 117 |
|
| 118 |
+
{/* Chat Interface - Only shown when showChat is true */}
|
| 119 |
+
{showChat && (
|
| 120 |
+
<div className="flex-1 flex flex-col min-h-0 bg-white rounded-lg m-2 shadow-lg">
|
| 121 |
+
<SimpleChat />
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
</div>
|
| 123 |
+
)}
|
| 124 |
</>
|
| 125 |
);
|
| 126 |
};
|
frontend/src/components/DocumentProcessor.jsx
CHANGED
|
@@ -1,9 +1,7 @@
|
|
| 1 |
-
// Removed useMemo import - no longer needed
|
| 2 |
import 'katex/dist/katex.min.css';
|
| 3 |
|
| 4 |
// Import custom hooks
|
| 5 |
import { useDocumentProcessor } from '../hooks/useDocumentProcessor';
|
| 6 |
-
// Removed useChat import - handled in ChunkPanel now
|
| 7 |
import { useChunkNavigation } from '../hooks/useChunkNavigation';
|
| 8 |
import { usePanelResize } from '../hooks/usePanelResize';
|
| 9 |
|
|
@@ -13,8 +11,6 @@ import DocumentViewer from './DocumentViewer';
|
|
| 13 |
import ChunkNavigation from './ChunkNavigation';
|
| 14 |
import ChunkPanel from './ChunkPanel';
|
| 15 |
|
| 16 |
-
// Removed markdown utilities - using PDF viewer now
|
| 17 |
-
|
| 18 |
function DocumentProcessor() {
|
| 19 |
// Custom hooks
|
| 20 |
const {
|
|
@@ -22,17 +18,12 @@ function DocumentProcessor() {
|
|
| 22 |
selectedFile,
|
| 23 |
processing,
|
| 24 |
uploadProgress,
|
| 25 |
-
ocrProgress,
|
| 26 |
documentData,
|
| 27 |
-
imageCache,
|
| 28 |
handleFileChange,
|
| 29 |
-
fetchImage,
|
| 30 |
processDocument,
|
| 31 |
setSelectedFile
|
| 32 |
} = useDocumentProcessor();
|
| 33 |
|
| 34 |
-
// Removed useChat hook - now handled in ChunkPanel
|
| 35 |
-
|
| 36 |
const {
|
| 37 |
chunkStates,
|
| 38 |
currentChunkIndex,
|
|
@@ -43,7 +34,7 @@ function DocumentProcessor() {
|
|
| 43 |
markChunkUnderstood,
|
| 44 |
startInteractiveLesson,
|
| 45 |
setChunkExpanded
|
| 46 |
-
} = useChunkNavigation(documentData, null);
|
| 47 |
|
| 48 |
const {
|
| 49 |
leftPanelWidth,
|
|
@@ -51,15 +42,12 @@ function DocumentProcessor() {
|
|
| 51 |
containerRef,
|
| 52 |
handleMouseDown
|
| 53 |
} = usePanelResize(50);
|
| 54 |
-
} = usePanelResize(50);
|
| 55 |
|
| 56 |
-
// Simplified startInteractiveLesson
|
| 57 |
const handleStartInteractiveLesson = () => {
|
| 58 |
startInteractiveLesson();
|
| 59 |
};
|
| 60 |
|
| 61 |
-
// No longer need highlighted markdown - using PDF viewer instead
|
| 62 |
-
|
| 63 |
// Early returns for different states
|
| 64 |
if (!selectedFile) {
|
| 65 |
return (
|
|
@@ -82,7 +70,7 @@ function DocumentProcessor() {
|
|
| 82 |
}
|
| 83 |
|
| 84 |
if (processing) {
|
| 85 |
-
return <LoadingAnimation uploadProgress={uploadProgress}
|
| 86 |
}
|
| 87 |
|
| 88 |
if (!documentData) {
|
|
@@ -116,7 +104,6 @@ function DocumentProcessor() {
|
|
| 116 |
{/* Left Panel - Document */}
|
| 117 |
<div style={{ width: `${leftPanelWidth}%`, height: '100%' }}>
|
| 118 |
<DocumentViewer
|
| 119 |
-
selectedFile={selectedFile}
|
| 120 |
selectedFile={selectedFile}
|
| 121 |
documentData={documentData}
|
| 122 |
/>
|
|
@@ -162,9 +149,6 @@ function DocumentProcessor() {
|
|
| 162 |
skipChunk={skipChunk}
|
| 163 |
markChunkUnderstood={markChunkUnderstood}
|
| 164 |
startInteractiveLesson={handleStartInteractiveLesson}
|
| 165 |
-
fetchImage={fetchImage}
|
| 166 |
-
imageCache={imageCache}
|
| 167 |
-
setImageCache={() => {}} // Handled by useDocumentProcessor
|
| 168 |
/>
|
| 169 |
</div>
|
| 170 |
</div>
|
|
|
|
|
|
|
| 1 |
import 'katex/dist/katex.min.css';
|
| 2 |
|
| 3 |
// Import custom hooks
|
| 4 |
import { useDocumentProcessor } from '../hooks/useDocumentProcessor';
|
|
|
|
| 5 |
import { useChunkNavigation } from '../hooks/useChunkNavigation';
|
| 6 |
import { usePanelResize } from '../hooks/usePanelResize';
|
| 7 |
|
|
|
|
| 11 |
import ChunkNavigation from './ChunkNavigation';
|
| 12 |
import ChunkPanel from './ChunkPanel';
|
| 13 |
|
|
|
|
|
|
|
| 14 |
function DocumentProcessor() {
|
| 15 |
// Custom hooks
|
| 16 |
const {
|
|
|
|
| 18 |
selectedFile,
|
| 19 |
processing,
|
| 20 |
uploadProgress,
|
|
|
|
| 21 |
documentData,
|
|
|
|
| 22 |
handleFileChange,
|
|
|
|
| 23 |
processDocument,
|
| 24 |
setSelectedFile
|
| 25 |
} = useDocumentProcessor();
|
| 26 |
|
|
|
|
|
|
|
| 27 |
const {
|
| 28 |
chunkStates,
|
| 29 |
currentChunkIndex,
|
|
|
|
| 34 |
markChunkUnderstood,
|
| 35 |
startInteractiveLesson,
|
| 36 |
setChunkExpanded
|
| 37 |
+
} = useChunkNavigation(documentData, null);
|
| 38 |
|
| 39 |
const {
|
| 40 |
leftPanelWidth,
|
|
|
|
| 42 |
containerRef,
|
| 43 |
handleMouseDown
|
| 44 |
} = usePanelResize(50);
|
|
|
|
| 45 |
|
| 46 |
+
// Simplified startInteractiveLesson
|
| 47 |
const handleStartInteractiveLesson = () => {
|
| 48 |
startInteractiveLesson();
|
| 49 |
};
|
| 50 |
|
|
|
|
|
|
|
| 51 |
// Early returns for different states
|
| 52 |
if (!selectedFile) {
|
| 53 |
return (
|
|
|
|
| 70 |
}
|
| 71 |
|
| 72 |
if (processing) {
|
| 73 |
+
return <LoadingAnimation uploadProgress={uploadProgress} />;
|
| 74 |
}
|
| 75 |
|
| 76 |
if (!documentData) {
|
|
|
|
| 104 |
{/* Left Panel - Document */}
|
| 105 |
<div style={{ width: `${leftPanelWidth}%`, height: '100%' }}>
|
| 106 |
<DocumentViewer
|
|
|
|
| 107 |
selectedFile={selectedFile}
|
| 108 |
documentData={documentData}
|
| 109 |
/>
|
|
|
|
| 149 |
skipChunk={skipChunk}
|
| 150 |
markChunkUnderstood={markChunkUnderstood}
|
| 151 |
startInteractiveLesson={handleStartInteractiveLesson}
|
|
|
|
|
|
|
|
|
|
| 152 |
/>
|
| 153 |
</div>
|
| 154 |
</div>
|
frontend/src/components/DocumentProcessor.jsx.backup
DELETED
|
@@ -1,889 +0,0 @@
|
|
| 1 |
-
import { useMemo } from 'react';
|
| 2 |
-
import 'katex/dist/katex.min.css';
|
| 3 |
-
|
| 4 |
-
// Import custom hooks
|
| 5 |
-
import { useDocumentProcessor } from '../hooks/useDocumentProcessor';
|
| 6 |
-
import { useChat } from '../hooks/useChat';
|
| 7 |
-
import { useChunkNavigation } from '../hooks/useChunkNavigation';
|
| 8 |
-
import { usePanelResize } from '../hooks/usePanelResize';
|
| 9 |
-
|
| 10 |
-
// Import components
|
| 11 |
-
import LoadingAnimation from './LoadingAnimation';
|
| 12 |
-
import DocumentViewer from './DocumentViewer';
|
| 13 |
-
import ChunkNavigation from './ChunkNavigation';
|
| 14 |
-
import ChunkPanel from './ChunkPanel';
|
| 15 |
-
|
| 16 |
-
// Import utilities
|
| 17 |
-
import { highlightChunkInMarkdown } from '../utils/markdownUtils';
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
function DocumentProcessor() {
|
| 21 |
-
// Custom hooks
|
| 22 |
-
const {
|
| 23 |
-
fileInputRef,
|
| 24 |
-
selectedFile,
|
| 25 |
-
processing,
|
| 26 |
-
uploadProgress,
|
| 27 |
-
ocrProgress,
|
| 28 |
-
documentData,
|
| 29 |
-
imageCache,
|
| 30 |
-
handleFileChange,
|
| 31 |
-
fetchImage,
|
| 32 |
-
processDocument,
|
| 33 |
-
setSelectedFile
|
| 34 |
-
} = useDocumentProcessor();
|
| 35 |
-
|
| 36 |
-
const {
|
| 37 |
-
chatLoading,
|
| 38 |
-
chatMessages,
|
| 39 |
-
userInput,
|
| 40 |
-
typingMessage,
|
| 41 |
-
startChunkLesson,
|
| 42 |
-
clearTypingAnimation,
|
| 43 |
-
setUserInput
|
| 44 |
-
} = useChat();
|
| 45 |
-
|
| 46 |
-
const {
|
| 47 |
-
chunkStates,
|
| 48 |
-
currentChunkIndex,
|
| 49 |
-
chunkExpanded,
|
| 50 |
-
goToNextChunk,
|
| 51 |
-
goToPrevChunk,
|
| 52 |
-
skipChunk,
|
| 53 |
-
markChunkUnderstood,
|
| 54 |
-
startInteractiveLesson,
|
| 55 |
-
setChunkExpanded
|
| 56 |
-
} = useChunkNavigation(documentData, clearTypingAnimation);
|
| 57 |
-
|
| 58 |
-
const {
|
| 59 |
-
leftPanelWidth,
|
| 60 |
-
isDragging,
|
| 61 |
-
containerRef,
|
| 62 |
-
handleMouseDown
|
| 63 |
-
} = usePanelResize(40);
|
| 64 |
-
|
| 65 |
-
// Enhanced startInteractiveLesson that uses the chat hook
|
| 66 |
-
const handleStartInteractiveLesson = () => {
|
| 67 |
-
startInteractiveLesson(() => startChunkLesson(currentChunkIndex, documentData));
|
| 68 |
-
};
|
| 69 |
-
|
| 70 |
-
// Memoize the highlighted markdown to prevent unnecessary re-renders
|
| 71 |
-
const highlightedMarkdown = useMemo(() => {
|
| 72 |
-
if (!documentData || !documentData.markdown || !documentData.chunks) {
|
| 73 |
-
return '';
|
| 74 |
-
}
|
| 75 |
-
return highlightChunkInMarkdown(documentData.markdown, documentData.chunks, currentChunkIndex);
|
| 76 |
-
}, [documentData?.markdown, documentData?.chunks, currentChunkIndex]);
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
// Handle panel resizing
|
| 80 |
-
const handleMouseDown = (e) => {
|
| 81 |
-
setIsDragging(true);
|
| 82 |
-
e.preventDefault();
|
| 83 |
-
};
|
| 84 |
-
|
| 85 |
-
const handleMouseMove = (e) => {
|
| 86 |
-
if (!isDragging || !containerRef.current) return;
|
| 87 |
-
|
| 88 |
-
const containerRect = containerRef.current.getBoundingClientRect();
|
| 89 |
-
const newLeftWidth = ((e.clientX - containerRect.left) / containerRect.width) * 100;
|
| 90 |
-
|
| 91 |
-
// Constrain between 20% and 80%
|
| 92 |
-
if (newLeftWidth >= 20 && newLeftWidth <= 80) {
|
| 93 |
-
setLeftPanelWidth(newLeftWidth);
|
| 94 |
-
}
|
| 95 |
-
};
|
| 96 |
-
|
| 97 |
-
const handleMouseUp = () => {
|
| 98 |
-
setIsDragging(false);
|
| 99 |
-
};
|
| 100 |
-
|
| 101 |
-
useEffect(() => {
|
| 102 |
-
if (isDragging) {
|
| 103 |
-
document.addEventListener('mousemove', handleMouseMove);
|
| 104 |
-
document.addEventListener('mouseup', handleMouseUp);
|
| 105 |
-
return () => {
|
| 106 |
-
document.removeEventListener('mousemove', handleMouseMove);
|
| 107 |
-
document.removeEventListener('mouseup', handleMouseUp);
|
| 108 |
-
};
|
| 109 |
-
}
|
| 110 |
-
}, [isDragging]);
|
| 111 |
-
|
| 112 |
-
// Function to simulate typing animation
|
| 113 |
-
const typeMessage = (text, callback) => {
|
| 114 |
-
// Clear any existing typing animation
|
| 115 |
-
if (typingInterval) {
|
| 116 |
-
clearInterval(typingInterval);
|
| 117 |
-
}
|
| 118 |
-
|
| 119 |
-
setTypingMessage('');
|
| 120 |
-
let currentIndex = 0;
|
| 121 |
-
const typeSpeed = Math.max(1, Math.min(3, 200 / text.length)); // Much faster: max 800ms total
|
| 122 |
-
|
| 123 |
-
const interval = setInterval(() => {
|
| 124 |
-
if (currentIndex < text.length) {
|
| 125 |
-
setTypingMessage(text.slice(0, currentIndex + 1));
|
| 126 |
-
currentIndex++;
|
| 127 |
-
} else {
|
| 128 |
-
clearInterval(interval);
|
| 129 |
-
setTypingInterval(null);
|
| 130 |
-
setTypingMessage('');
|
| 131 |
-
callback();
|
| 132 |
-
}
|
| 133 |
-
}, typeSpeed);
|
| 134 |
-
|
| 135 |
-
setTypingInterval(interval);
|
| 136 |
-
};
|
| 137 |
-
|
| 138 |
-
// Function to start a chunk lesson
|
| 139 |
-
const startChunkLesson = async (chunkIndex) => {
|
| 140 |
-
if (!documentData || !documentData.chunks[chunkIndex]) return;
|
| 141 |
-
|
| 142 |
-
setChatLoading(true);
|
| 143 |
-
|
| 144 |
-
try {
|
| 145 |
-
const chunk = documentData.chunks[chunkIndex];
|
| 146 |
-
console.log('Starting lesson for chunk:', chunkIndex, chunk);
|
| 147 |
-
console.log('Document data:', documentData.fileId, documentData.markdown?.length);
|
| 148 |
-
|
| 149 |
-
const response = await fetch(`/start_chunk_lesson/${documentData.fileId}/${chunkIndex}`, {
|
| 150 |
-
method: 'POST',
|
| 151 |
-
headers: {
|
| 152 |
-
'Content-Type': 'application/json',
|
| 153 |
-
},
|
| 154 |
-
body: JSON.stringify({
|
| 155 |
-
chunk: chunk,
|
| 156 |
-
document_markdown: documentData.markdown
|
| 157 |
-
})
|
| 158 |
-
});
|
| 159 |
-
|
| 160 |
-
if (!response.ok) {
|
| 161 |
-
const errorData = await response.text();
|
| 162 |
-
console.error('Backend error:', errorData);
|
| 163 |
-
throw new Error(`Failed to start lesson: ${response.status} - ${errorData}`);
|
| 164 |
-
}
|
| 165 |
-
|
| 166 |
-
const lessonData = await response.json();
|
| 167 |
-
setChatData(prev => ({
|
| 168 |
-
...prev,
|
| 169 |
-
[chunkIndex]: {
|
| 170 |
-
...lessonData,
|
| 171 |
-
chunkIndex: chunkIndex,
|
| 172 |
-
chunk: chunk
|
| 173 |
-
}
|
| 174 |
-
}));
|
| 175 |
-
|
| 176 |
-
setChatLoading(false);
|
| 177 |
-
|
| 178 |
-
// Type out the message with animation
|
| 179 |
-
typeMessage(lessonData.questions, () => {
|
| 180 |
-
setChatMessages(prev => ({
|
| 181 |
-
...prev,
|
| 182 |
-
[chunkIndex]: [
|
| 183 |
-
{ type: 'ai', text: lessonData.questions }
|
| 184 |
-
]
|
| 185 |
-
}));
|
| 186 |
-
});
|
| 187 |
-
|
| 188 |
-
} catch (error) {
|
| 189 |
-
console.error('Error starting lesson:', error);
|
| 190 |
-
alert('Error starting lesson: ' + error.message);
|
| 191 |
-
setChatLoading(false);
|
| 192 |
-
}
|
| 193 |
-
};
|
| 194 |
-
|
| 195 |
-
// Navigation functions
|
| 196 |
-
const goToNextChunk = () => {
|
| 197 |
-
if (documentData && currentChunkIndex < documentData.chunks.length - 1) {
|
| 198 |
-
// Clear any ongoing typing animation
|
| 199 |
-
if (typingInterval) {
|
| 200 |
-
clearInterval(typingInterval);
|
| 201 |
-
setTypingInterval(null);
|
| 202 |
-
}
|
| 203 |
-
setTypingMessage('');
|
| 204 |
-
setCurrentChunkIndex(currentChunkIndex + 1);
|
| 205 |
-
}
|
| 206 |
-
};
|
| 207 |
-
|
| 208 |
-
const goToPrevChunk = () => {
|
| 209 |
-
if (currentChunkIndex > 0) {
|
| 210 |
-
// Clear any ongoing typing animation
|
| 211 |
-
if (typingInterval) {
|
| 212 |
-
clearInterval(typingInterval);
|
| 213 |
-
setTypingInterval(null);
|
| 214 |
-
}
|
| 215 |
-
setTypingMessage('');
|
| 216 |
-
setCurrentChunkIndex(currentChunkIndex - 1);
|
| 217 |
-
}
|
| 218 |
-
};
|
| 219 |
-
|
| 220 |
-
// Chunk action functions
|
| 221 |
-
const skipChunk = () => {
|
| 222 |
-
setChunkStates(prev => ({
|
| 223 |
-
...prev,
|
| 224 |
-
[currentChunkIndex]: 'skipped'
|
| 225 |
-
}));
|
| 226 |
-
};
|
| 227 |
-
|
| 228 |
-
const markChunkUnderstood = () => {
|
| 229 |
-
setChunkStates(prev => ({
|
| 230 |
-
...prev,
|
| 231 |
-
[currentChunkIndex]: 'understood'
|
| 232 |
-
}));
|
| 233 |
-
};
|
| 234 |
-
|
| 235 |
-
const startInteractiveLesson = () => {
|
| 236 |
-
setChunkStates(prev => ({
|
| 237 |
-
...prev,
|
| 238 |
-
[currentChunkIndex]: 'interactive'
|
| 239 |
-
}));
|
| 240 |
-
startChunkLesson(currentChunkIndex);
|
| 241 |
-
};
|
| 242 |
-
|
| 243 |
-
const fetchImage = useCallback(async (imageId, fileId) => {
|
| 244 |
-
// Check if image is already cached using ref
|
| 245 |
-
if (imageCacheRef.current[imageId]) {
|
| 246 |
-
return imageCacheRef.current[imageId];
|
| 247 |
-
}
|
| 248 |
-
|
| 249 |
-
try {
|
| 250 |
-
const response = await fetch(`/get_image/${fileId}/${imageId}`);
|
| 251 |
-
if (response.ok) {
|
| 252 |
-
const data = await response.json();
|
| 253 |
-
const imageData = data.image_base64;
|
| 254 |
-
|
| 255 |
-
// Cache the image in ref
|
| 256 |
-
imageCacheRef.current = {
|
| 257 |
-
...imageCacheRef.current,
|
| 258 |
-
[imageId]: imageData
|
| 259 |
-
};
|
| 260 |
-
|
| 261 |
-
// Also update state for other components that might need it
|
| 262 |
-
setImageCache(prev => ({
|
| 263 |
-
...prev,
|
| 264 |
-
[imageId]: imageData
|
| 265 |
-
}));
|
| 266 |
-
|
| 267 |
-
return imageData;
|
| 268 |
-
}
|
| 269 |
-
} catch (error) {
|
| 270 |
-
console.error('Error fetching image:', error);
|
| 271 |
-
}
|
| 272 |
-
return null;
|
| 273 |
-
}, []); // No dependencies - stable function
|
| 274 |
-
|
| 275 |
-
const ImageComponent = memo(({ src, alt }) => {
|
| 276 |
-
const [imageSrc, setImageSrc] = useState(null);
|
| 277 |
-
const [loading, setLoading] = useState(true);
|
| 278 |
-
|
| 279 |
-
useEffect(() => {
|
| 280 |
-
if (documentData && src) {
|
| 281 |
-
fetchImage(src, documentData.fileId).then(imageData => {
|
| 282 |
-
if (imageData) {
|
| 283 |
-
setImageSrc(imageData);
|
| 284 |
-
}
|
| 285 |
-
setLoading(false);
|
| 286 |
-
});
|
| 287 |
-
}
|
| 288 |
-
}, [src, documentData?.fileId, fetchImage]);
|
| 289 |
-
|
| 290 |
-
if (loading) {
|
| 291 |
-
return (
|
| 292 |
-
<span style={{
|
| 293 |
-
display: 'inline-block',
|
| 294 |
-
width: '100%',
|
| 295 |
-
height: '200px',
|
| 296 |
-
backgroundColor: '#f3f4f6',
|
| 297 |
-
textAlign: 'center',
|
| 298 |
-
lineHeight: '200px',
|
| 299 |
-
margin: '1rem 0',
|
| 300 |
-
borderRadius: '0.5rem',
|
| 301 |
-
color: '#6b7280'
|
| 302 |
-
}}>
|
| 303 |
-
Loading image...
|
| 304 |
-
</span>
|
| 305 |
-
);
|
| 306 |
-
}
|
| 307 |
-
|
| 308 |
-
if (!imageSrc) {
|
| 309 |
-
return (
|
| 310 |
-
<span style={{
|
| 311 |
-
display: 'inline-block',
|
| 312 |
-
width: '100%',
|
| 313 |
-
height: '200px',
|
| 314 |
-
backgroundColor: '#fef2f2',
|
| 315 |
-
textAlign: 'center',
|
| 316 |
-
lineHeight: '200px',
|
| 317 |
-
margin: '1rem 0',
|
| 318 |
-
borderRadius: '0.5rem',
|
| 319 |
-
border: '1px solid #fecaca',
|
| 320 |
-
color: '#dc2626'
|
| 321 |
-
}}>
|
| 322 |
-
Image not found: {alt || src}
|
| 323 |
-
</span>
|
| 324 |
-
);
|
| 325 |
-
}
|
| 326 |
-
|
| 327 |
-
return (
|
| 328 |
-
<img
|
| 329 |
-
src={imageSrc}
|
| 330 |
-
alt={alt || 'Document image'}
|
| 331 |
-
style={{
|
| 332 |
-
display: 'block',
|
| 333 |
-
maxWidth: '100%',
|
| 334 |
-
height: 'auto',
|
| 335 |
-
margin: '1.5rem auto'
|
| 336 |
-
}}
|
| 337 |
-
/>
|
| 338 |
-
);
|
| 339 |
-
});
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
const processDocument = async () => {
|
| 344 |
-
if (!selectedFile) return;
|
| 345 |
-
|
| 346 |
-
setProcessing(true);
|
| 347 |
-
setUploadProgress(0);
|
| 348 |
-
setOcrProgress(0);
|
| 349 |
-
|
| 350 |
-
try {
|
| 351 |
-
// Step 1: Upload PDF
|
| 352 |
-
const formData = new FormData();
|
| 353 |
-
formData.append('file', selectedFile);
|
| 354 |
-
|
| 355 |
-
setUploadProgress(30);
|
| 356 |
-
const uploadResponse = await fetch('/upload_pdf', {
|
| 357 |
-
method: 'POST',
|
| 358 |
-
body: formData,
|
| 359 |
-
});
|
| 360 |
-
|
| 361 |
-
if (!uploadResponse.ok) {
|
| 362 |
-
throw new Error('Failed to upload PDF');
|
| 363 |
-
}
|
| 364 |
-
|
| 365 |
-
const uploadData = await uploadResponse.json();
|
| 366 |
-
setUploadProgress(100);
|
| 367 |
-
|
| 368 |
-
// Step 2: Process OCR
|
| 369 |
-
setOcrProgress(20);
|
| 370 |
-
await new Promise(resolve => setTimeout(resolve, 500)); // Small delay for UX
|
| 371 |
-
|
| 372 |
-
setOcrProgress(60);
|
| 373 |
-
const ocrResponse = await fetch(`/process_ocr/${uploadData.file_id}`);
|
| 374 |
-
|
| 375 |
-
if (!ocrResponse.ok) {
|
| 376 |
-
throw new Error('Failed to process OCR');
|
| 377 |
-
}
|
| 378 |
-
|
| 379 |
-
const ocrData = await ocrResponse.json();
|
| 380 |
-
setOcrProgress(100);
|
| 381 |
-
|
| 382 |
-
// Combine all markdown from pages
|
| 383 |
-
const combinedMarkdown = ocrData.pages
|
| 384 |
-
.map(page => page.markdown)
|
| 385 |
-
.join('\n\n---\n\n');
|
| 386 |
-
|
| 387 |
-
// Collect all chunks from all pages
|
| 388 |
-
const allChunks = [];
|
| 389 |
-
let markdownOffset = 0;
|
| 390 |
-
|
| 391 |
-
ocrData.pages.forEach((page, pageIndex) => {
|
| 392 |
-
if (page.chunks && page.chunks.length > 0) {
|
| 393 |
-
page.chunks.forEach(chunk => {
|
| 394 |
-
allChunks.push({
|
| 395 |
-
...chunk,
|
| 396 |
-
start_position: chunk.start_position + markdownOffset,
|
| 397 |
-
end_position: chunk.end_position + markdownOffset,
|
| 398 |
-
pageIndex: pageIndex
|
| 399 |
-
});
|
| 400 |
-
});
|
| 401 |
-
}
|
| 402 |
-
markdownOffset += page.markdown.length + 6; // +6 for the separator "\n\n---\n\n"
|
| 403 |
-
});
|
| 404 |
-
|
| 405 |
-
setDocumentData({
|
| 406 |
-
fileId: uploadData.file_id,
|
| 407 |
-
filename: uploadData.filename,
|
| 408 |
-
markdown: combinedMarkdown,
|
| 409 |
-
pages: ocrData.pages,
|
| 410 |
-
totalPages: ocrData.total_pages,
|
| 411 |
-
chunks: allChunks
|
| 412 |
-
});
|
| 413 |
-
|
| 414 |
-
} catch (error) {
|
| 415 |
-
console.error('Error processing document:', error);
|
| 416 |
-
alert('Error processing document: ' + error.message);
|
| 417 |
-
} finally {
|
| 418 |
-
setProcessing(false);
|
| 419 |
-
}
|
| 420 |
-
};
|
| 421 |
-
|
| 422 |
-
const LoadingAnimation = () => (
|
| 423 |
-
<div className="flex flex-col items-center justify-center min-h-screen bg-gray-50">
|
| 424 |
-
<div className="text-center max-w-md">
|
| 425 |
-
<div className="mb-8">
|
| 426 |
-
<div className="w-16 h-16 border-4 border-blue-500 border-t-transparent rounded-full animate-spin mx-auto mb-4"></div>
|
| 427 |
-
<h2 className="text-2xl font-bold text-gray-900 mb-2">Processing Your Document</h2>
|
| 428 |
-
<p className="text-gray-600">This may take a moment...</p>
|
| 429 |
-
</div>
|
| 430 |
-
|
| 431 |
-
{/* Upload Progress */}
|
| 432 |
-
<div className="mb-6">
|
| 433 |
-
<div className="flex justify-between text-sm text-gray-600 mb-1">
|
| 434 |
-
<span>Uploading PDF</span>
|
| 435 |
-
<span>{uploadProgress}%</span>
|
| 436 |
-
</div>
|
| 437 |
-
<div className="w-full bg-gray-200 rounded-full h-2">
|
| 438 |
-
<div
|
| 439 |
-
className="bg-blue-500 h-2 rounded-full transition-all duration-300"
|
| 440 |
-
style={{ width: `${uploadProgress}%` }}
|
| 441 |
-
></div>
|
| 442 |
-
</div>
|
| 443 |
-
</div>
|
| 444 |
-
|
| 445 |
-
{/* OCR Progress */}
|
| 446 |
-
<div className="mb-6">
|
| 447 |
-
<div className="flex justify-between text-sm text-gray-600 mb-1">
|
| 448 |
-
<span>Processing with AI</span>
|
| 449 |
-
<span>{ocrProgress}%</span>
|
| 450 |
-
</div>
|
| 451 |
-
<div className="w-full bg-gray-200 rounded-full h-2">
|
| 452 |
-
<div
|
| 453 |
-
className="bg-green-500 h-2 rounded-full transition-all duration-300"
|
| 454 |
-
style={{ width: `${ocrProgress}%` }}
|
| 455 |
-
></div>
|
| 456 |
-
</div>
|
| 457 |
-
</div>
|
| 458 |
-
|
| 459 |
-
<p className="text-sm text-gray-500">
|
| 460 |
-
Using AI to extract text and understand your document structure...
|
| 461 |
-
</p>
|
| 462 |
-
</div>
|
| 463 |
-
</div>
|
| 464 |
-
);
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
if (!selectedFile) {
|
| 468 |
-
return (
|
| 469 |
-
<div className="h-screen bg-gray-50 flex items-center justify-center">
|
| 470 |
-
<input
|
| 471 |
-
ref={fileInputRef}
|
| 472 |
-
type="file"
|
| 473 |
-
accept=".pdf"
|
| 474 |
-
className="hidden"
|
| 475 |
-
onChange={handleFileChange}
|
| 476 |
-
/>
|
| 477 |
-
<button
|
| 478 |
-
onClick={() => fileInputRef.current.click()}
|
| 479 |
-
className="px-6 py-3 bg-white shadow-md hover:shadow-lg text-gray-700 font-medium rounded-lg transition-all"
|
| 480 |
-
>
|
| 481 |
-
Select PDF
|
| 482 |
-
</button>
|
| 483 |
-
</div>
|
| 484 |
-
);
|
| 485 |
-
}
|
| 486 |
-
|
| 487 |
-
if (processing) {
|
| 488 |
-
return <LoadingAnimation />;
|
| 489 |
-
}
|
| 490 |
-
|
| 491 |
-
if (!documentData) {
|
| 492 |
-
return (
|
| 493 |
-
<div className="h-screen bg-gray-50 flex items-center justify-center">
|
| 494 |
-
<div className="flex gap-4">
|
| 495 |
-
<button
|
| 496 |
-
onClick={processDocument}
|
| 497 |
-
className="px-6 py-3 bg-white shadow-md hover:shadow-lg text-gray-700 font-medium rounded-lg transition-all"
|
| 498 |
-
>
|
| 499 |
-
Process
|
| 500 |
-
</button>
|
| 501 |
-
<button
|
| 502 |
-
onClick={() => setSelectedFile(null)}
|
| 503 |
-
className="px-6 py-3 bg-white shadow-md hover:shadow-lg text-gray-700 font-medium rounded-lg transition-all"
|
| 504 |
-
>
|
| 505 |
-
← Back
|
| 506 |
-
</button>
|
| 507 |
-
</div>
|
| 508 |
-
</div>
|
| 509 |
-
);
|
| 510 |
-
}
|
| 511 |
-
|
| 512 |
-
return (
|
| 513 |
-
<div
|
| 514 |
-
ref={containerRef}
|
| 515 |
-
className="h-screen bg-gray-100 flex gap-2 p-6 overflow-hidden"
|
| 516 |
-
style={{ cursor: isDragging ? 'col-resize' : 'default' }}
|
| 517 |
-
>
|
| 518 |
-
{/* Left Panel - Document */}
|
| 519 |
-
<div
|
| 520 |
-
className="bg-white rounded-lg shadow-sm flex flex-col"
|
| 521 |
-
style={{ width: `${leftPanelWidth}%` }}
|
| 522 |
-
>
|
| 523 |
-
{/* Header */}
|
| 524 |
-
<div className="sticky top-0 bg-white rounded-t-lg px-6 py-4 border-b border-gray-200 z-10">
|
| 525 |
-
<h2 className="text-lg font-semibold text-left text-gray-800">Document</h2>
|
| 526 |
-
</div>
|
| 527 |
-
|
| 528 |
-
{/* Content */}
|
| 529 |
-
<div className="flex-1 px-6 pt-6 pb-8 overflow-y-auto">
|
| 530 |
-
<style>
|
| 531 |
-
{`
|
| 532 |
-
@keyframes fadeInHighlight {
|
| 533 |
-
0% {
|
| 534 |
-
background-color: rgba(255, 214, 100, 0);
|
| 535 |
-
border-left-color: rgba(156, 163, 175, 0);
|
| 536 |
-
transform: translateX(-10px);
|
| 537 |
-
opacity: 0;
|
| 538 |
-
}
|
| 539 |
-
100% {
|
| 540 |
-
background-color: rgba(255, 214, 100, 0.15);
|
| 541 |
-
border-left-color: rgba(156, 163, 175, 0.5);
|
| 542 |
-
transform: translateX(0);
|
| 543 |
-
opacity: 1;
|
| 544 |
-
}
|
| 545 |
-
}
|
| 546 |
-
`}
|
| 547 |
-
</style>
|
| 548 |
-
<div className="prose prose-sm max-w-none" style={{
|
| 549 |
-
fontSize: '0.875rem',
|
| 550 |
-
lineHeight: '1.5',
|
| 551 |
-
color: 'rgb(55, 65, 81)'
|
| 552 |
-
}}>
|
| 553 |
-
<ReactMarkdown
|
| 554 |
-
remarkPlugins={[remarkMath]}
|
| 555 |
-
rehypePlugins={[rehypeRaw, rehypeKatex]}
|
| 556 |
-
components={{
|
| 557 |
-
h1: ({ children }) => <h1 style={{ fontSize: '1.5rem', fontWeight: 'bold', marginBottom: '1rem', color: '#1a202c' }}>{children}</h1>,
|
| 558 |
-
h2: ({ children }) => <h2 style={{ fontSize: '1.25rem', fontWeight: 'bold', marginBottom: '0.75rem', marginTop: '1.5rem', color: '#1a202c' }}>{children}</h2>,
|
| 559 |
-
h3: ({ children }) => <h3 style={{ fontSize: '1.125rem', fontWeight: 'bold', marginBottom: '0.5rem', marginTop: '1rem', color: '#1a202c' }}>{children}</h3>,
|
| 560 |
-
p: ({ children }) => <p style={{ marginBottom: '0.75rem', color: '#374151', lineHeight: '1.5', fontSize: '0.875rem' }}>{children}</p>,
|
| 561 |
-
hr: () => <hr style={{ margin: '1.5rem 0', borderColor: '#d1d5db' }} />,
|
| 562 |
-
ul: ({ children }) => <ul style={{ marginBottom: '0.75rem', marginLeft: '1.25rem', listStyleType: 'disc', fontSize: '0.875rem' }}>{children}</ul>,
|
| 563 |
-
ol: ({ children }) => <ol style={{ marginBottom: '0.75rem', marginLeft: '1.25rem', listStyleType: 'decimal', fontSize: '0.875rem' }}>{children}</ol>,
|
| 564 |
-
li: ({ children }) => <li style={{ marginBottom: '0.125rem', color: '#374151' }}>{children}</li>,
|
| 565 |
-
blockquote: ({ children }) => (
|
| 566 |
-
<blockquote style={{ borderLeft: '3px solid #3b82f6', paddingLeft: '0.75rem', fontStyle: 'italic', margin: '0.75rem 0', color: '#6b7280', fontSize: '0.875rem' }}>
|
| 567 |
-
{children}
|
| 568 |
-
</blockquote>
|
| 569 |
-
),
|
| 570 |
-
code: ({ inline, children }) =>
|
| 571 |
-
inline ?
|
| 572 |
-
<code style={{ backgroundColor: '#f3f4f6', padding: '0.125rem 0.25rem', borderRadius: '0.25rem', fontSize: '0.75rem', fontFamily: 'monospace' }}>{children}</code> :
|
| 573 |
-
<pre style={{ backgroundColor: '#f3f4f6', padding: '0.75rem', borderRadius: '0.375rem', overflowX: 'auto', margin: '0.75rem 0' }}>
|
| 574 |
-
<code style={{ fontSize: '0.75rem', fontFamily: 'monospace' }}>{children}</code>
|
| 575 |
-
</pre>,
|
| 576 |
-
div: ({ children, style }) => (
|
| 577 |
-
<div style={style}>
|
| 578 |
-
{children}
|
| 579 |
-
</div>
|
| 580 |
-
),
|
| 581 |
-
img: ({ src, alt }) => <ImageComponent src={src} alt={alt} />
|
| 582 |
-
}}
|
| 583 |
-
>
|
| 584 |
-
{highlightedMarkdown}
|
| 585 |
-
</ReactMarkdown>
|
| 586 |
-
</div>
|
| 587 |
-
</div>
|
| 588 |
-
</div>
|
| 589 |
-
|
| 590 |
-
{/* Resizable Divider */}
|
| 591 |
-
<div
|
| 592 |
-
className="flex items-center justify-center cursor-col-resize group transition-all duration-200"
|
| 593 |
-
style={{ width: '8px' }}
|
| 594 |
-
onMouseDown={handleMouseDown}
|
| 595 |
-
>
|
| 596 |
-
{/* Resizable Divider */}
|
| 597 |
-
<div
|
| 598 |
-
className="w-px h-full rounded-full transition-all
|
| 599 |
-
duration-200 group-hover:shadow-lg"
|
| 600 |
-
style={{
|
| 601 |
-
backgroundColor: isDragging ? 'rgba(59, 130, 246, 0.8)' : 'transparent',
|
| 602 |
-
boxShadow: isDragging ? '0 0 8px rgba(59, 130, 246, 0.8)' : 'none'
|
| 603 |
-
}}
|
| 604 |
-
></div>
|
| 605 |
-
</div>
|
| 606 |
-
|
| 607 |
-
{/* Right Panel Container */}
|
| 608 |
-
<div
|
| 609 |
-
className="flex flex-col"
|
| 610 |
-
style={{ width: `${100 - leftPanelWidth}%` }}
|
| 611 |
-
>
|
| 612 |
-
{/* Navigation Bar - Above chunk panel */}
|
| 613 |
-
<div className="flex items-center justify-center gap-4 mb-4 px-4">
|
| 614 |
-
<button
|
| 615 |
-
onClick={goToPrevChunk}
|
| 616 |
-
disabled={currentChunkIndex === 0}
|
| 617 |
-
className="p-3 bg-white hover:bg-gray-50 disabled:opacity-30 disabled:cursor-not-allowed rounded-lg shadow-sm transition-all"
|
| 618 |
-
>
|
| 619 |
-
<svg className="w-5 h-5 text-gray-700" fill="none" stroke="currentColor" viewBox="0 0 24 24" strokeWidth={3}>
|
| 620 |
-
<path strokeLinecap="round" strokeLinejoin="round" d="M15 19l-7-7 7-7" />
|
| 621 |
-
</svg>
|
| 622 |
-
</button>
|
| 623 |
-
|
| 624 |
-
<div className="flex space-x-2">
|
| 625 |
-
{documentData?.chunks?.map((_, index) => (
|
| 626 |
-
<div
|
| 627 |
-
key={index}
|
| 628 |
-
className={`w-3 h-3 rounded-full ${
|
| 629 |
-
chunkStates[index] === 'understood' ? 'bg-green-500' :
|
| 630 |
-
chunkStates[index] === 'skipped' ? 'bg-red-500' :
|
| 631 |
-
chunkStates[index] === 'interactive' ? 'bg-blue-500' :
|
| 632 |
-
index === currentChunkIndex ? 'bg-gray-600' : 'bg-gray-300'
|
| 633 |
-
}`}
|
| 634 |
-
/>
|
| 635 |
-
))}
|
| 636 |
-
</div>
|
| 637 |
-
|
| 638 |
-
<button
|
| 639 |
-
onClick={goToNextChunk}
|
| 640 |
-
disabled={!documentData?.chunks || currentChunkIndex === documentData.chunks.length - 1}
|
| 641 |
-
className="p-3 bg-white hover:bg-gray-50 disabled:opacity-30 disabled:cursor-not-allowed rounded-lg shadow-sm transition-all"
|
| 642 |
-
>
|
| 643 |
-
<svg className="w-5 h-5 text-gray-700" fill="none" stroke="currentColor" viewBox="0 0 24 24" strokeWidth={3}>
|
| 644 |
-
<path strokeLinecap="round" strokeLinejoin="round" d="M9 5l7 7-7 7" />
|
| 645 |
-
</svg>
|
| 646 |
-
</button>
|
| 647 |
-
</div>
|
| 648 |
-
|
| 649 |
-
{/* Chunk Panel */}
|
| 650 |
-
{/* Chunk Header - Left aligned title only */}
|
| 651 |
-
<div className="px-6 py-4 flex-shrink-0 bg-white rounded-t-lg border-b border-gray-200 z-10">
|
| 652 |
-
<div className="flex items-center justify-between">
|
| 653 |
-
<button
|
| 654 |
-
onClick={() => setChunkExpanded(!chunkExpanded)}
|
| 655 |
-
className="flex items-center hover:bg-gray-50 py-2 px-3 rounded-lg transition-all -ml-3"
|
| 656 |
-
>
|
| 657 |
-
<span className="font-semibold text-gray-900 text-left">
|
| 658 |
-
{documentData?.chunks?.[currentChunkIndex]?.topic || "Loading..."}
|
| 659 |
-
</span>
|
| 660 |
-
<span className="text-gray-400 ml-3">
|
| 661 |
-
{chunkExpanded ? '▲' : '▼'}
|
| 662 |
-
</span>
|
| 663 |
-
</button>
|
| 664 |
-
|
| 665 |
-
<button
|
| 666 |
-
onClick={markChunkUnderstood}
|
| 667 |
-
className="py-2 px-4 bg-gray-50 hover:bg-gray-100 text-gray-600 rounded-lg transition-all text-sm"
|
| 668 |
-
>
|
| 669 |
-
✓
|
| 670 |
-
</button>
|
| 671 |
-
</div>
|
| 672 |
-
|
| 673 |
-
{/* Expandable Chunk Content - in header area */}
|
| 674 |
-
{chunkExpanded && documentData?.chunks?.[currentChunkIndex] && (
|
| 675 |
-
<div className="prose prose-sm max-w-none">
|
| 676 |
-
<ReactMarkdown
|
| 677 |
-
remarkPlugins={[remarkMath]}
|
| 678 |
-
rehypePlugins={[rehypeRaw, rehypeKatex]}
|
| 679 |
-
components={{
|
| 680 |
-
h1: ({ children }) => <h1 style={{ fontSize: '1.25rem', fontWeight: 'bold', marginBottom: '0.75rem', color: '#1a202c' }}>{children}</h1>,
|
| 681 |
-
h2: ({ children }) => <h2 style={{ fontSize: '1.125rem', fontWeight: 'bold', marginBottom: '0.5rem', marginTop: '1rem', color: '#1a202c' }}>{children}</h2>,
|
| 682 |
-
h3: ({ children }) => <h3 style={{ fontSize: '1rem', fontWeight: 'bold', marginBottom: '0.5rem', marginTop: '0.75rem', color: '#1a202c' }}>{children}</h3>,
|
| 683 |
-
p: ({ children }) => <p style={{ marginBottom: '0.5rem', color: '#374151', lineHeight: '1.4', fontSize: '0.875rem' }}>{children}</p>,
|
| 684 |
-
hr: () => <hr style={{ margin: '1rem 0', borderColor: '#d1d5db' }} />,
|
| 685 |
-
ul: ({ children }) => <ul style={{ marginBottom: '0.5rem', marginLeft: '1rem', listStyleType: 'disc', fontSize: '0.875rem' }}>{children}</ul>,
|
| 686 |
-
ol: ({ children }) => <ol style={{ marginBottom: '0.5rem', marginLeft: '1rem', listStyleType: 'decimal', fontSize: '0.875rem' }}>{children}</ol>,
|
| 687 |
-
li: ({ children }) => <li style={{ marginBottom: '0.125rem', color: '#374151' }}>{children}</li>,
|
| 688 |
-
blockquote: ({ children }) => (
|
| 689 |
-
<blockquote style={{ borderLeft: '2px solid #9ca3af', paddingLeft: '0.5rem', fontStyle: 'italic', margin: '0.5rem 0', color: '#6b7280', fontSize: '0.875rem' }}>
|
| 690 |
-
{children}
|
| 691 |
-
</blockquote>
|
| 692 |
-
),
|
| 693 |
-
code: ({ inline, children }) =>
|
| 694 |
-
inline ?
|
| 695 |
-
<code style={{ backgroundColor: '#f3f4f6', padding: '0.125rem 0.25rem', borderRadius: '0.25rem', fontSize: '0.75rem', fontFamily: 'monospace' }}>{children}</code> :
|
| 696 |
-
<pre style={{ backgroundColor: '#f3f4f6', padding: '0.5rem', borderRadius: '0.25rem', overflowX: 'auto', margin: '0.5rem 0' }}>
|
| 697 |
-
<code style={{ fontSize: '0.75rem', fontFamily: 'monospace' }}>{children}</code>
|
| 698 |
-
</pre>,
|
| 699 |
-
img: ({ src, alt }) => <ImageComponent src={src} alt={alt} />
|
| 700 |
-
}}
|
| 701 |
-
>
|
| 702 |
-
{documentData.markdown.slice(
|
| 703 |
-
documentData.chunks[currentChunkIndex].start_position,
|
| 704 |
-
documentData.chunks[currentChunkIndex].end_position
|
| 705 |
-
)}
|
| 706 |
-
</ReactMarkdown>
|
| 707 |
-
</div>
|
| 708 |
-
)}
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
</div>
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
{/* Content Area */}
|
| 715 |
-
<div className="flex-1 flex flex-col min-h-0">
|
| 716 |
-
{/* Action Buttons */}
|
| 717 |
-
{chunkStates[currentChunkIndex] !== 'interactive' && (
|
| 718 |
-
<div className="flex-shrink-0 p-6 border-b border-gray-200">
|
| 719 |
-
<div className="flex gap-3">
|
| 720 |
-
<button
|
| 721 |
-
onClick={skipChunk}
|
| 722 |
-
className="flex-1 py-3 bg-gray-50 hover:bg-gray-100 text-gray-600 rounded-lg transition-all"
|
| 723 |
-
>
|
| 724 |
-
✕
|
| 725 |
-
</button>
|
| 726 |
-
|
| 727 |
-
<button
|
| 728 |
-
onClick={startInteractiveLesson}
|
| 729 |
-
disabled={chatLoading}
|
| 730 |
-
className="flex-1 py-3 bg-gray-50 hover:bg-gray-100 disabled:opacity-50 text-gray-600 rounded-lg transition-all"
|
| 731 |
-
>
|
| 732 |
-
{chatLoading ? '...' : 'Start'}
|
| 733 |
-
</button>
|
| 734 |
-
|
| 735 |
-
<button
|
| 736 |
-
onClick={markChunkUnderstood}
|
| 737 |
-
className="flex-1 py-3 bg-gray-50 hover:bg-gray-100 text-gray-600 rounded-lg transition-all"
|
| 738 |
-
>
|
| 739 |
-
✓
|
| 740 |
-
</button>
|
| 741 |
-
</div>
|
| 742 |
-
</div>
|
| 743 |
-
)}
|
| 744 |
-
|
| 745 |
-
{/* Chat Area - sandwich layout when interactive */}
|
| 746 |
-
{chunkStates[currentChunkIndex] === 'interactive' && (
|
| 747 |
-
<div className="flex-1 flex flex-col min-h-0">
|
| 748 |
-
{/* Chat Messages - scrollable middle layer */}
|
| 749 |
-
<div className="bg-white flex-1 overflow-y-auto space-y-4 px-6 py-2">
|
| 750 |
-
{(chatMessages[currentChunkIndex] || []).map((message, index) => (
|
| 751 |
-
message.type === 'user' ? (
|
| 752 |
-
<div
|
| 753 |
-
key={index}
|
| 754 |
-
className="w-full bg-gray-50 border border-gray-200 rounded-lg p-4 shadow-sm"
|
| 755 |
-
>
|
| 756 |
-
<div className="text-xs font-medium mb-2 text-gray-600">
|
| 757 |
-
You
|
| 758 |
-
</div>
|
| 759 |
-
<div className="prose prose-sm max-w-none">
|
| 760 |
-
<ReactMarkdown
|
| 761 |
-
remarkPlugins={[remarkMath]}
|
| 762 |
-
rehypePlugins={[rehypeRaw, rehypeKatex]}
|
| 763 |
-
components={{
|
| 764 |
-
p: ({ children }) => <p className="mb-2 text-gray-800 leading-relaxed">{children}</p>,
|
| 765 |
-
ul: ({ children }) => <ul className="mb-2 ml-4 list-disc">{children}</ul>,
|
| 766 |
-
ol: ({ children }) => <ol className="mb-2 ml-4 list-decimal">{children}</ol>,
|
| 767 |
-
li: ({ children }) => <li className="mb-1 text-gray-800">{children}</li>,
|
| 768 |
-
strong: ({ children }) => <strong className="font-semibold text-gray-900">{children}</strong>,
|
| 769 |
-
em: ({ children }) => <em className="italic">{children}</em>,
|
| 770 |
-
code: ({ inline, children }) =>
|
| 771 |
-
inline ?
|
| 772 |
-
<code className="bg-gray-100 px-1 py-0.5 rounded text-sm font-mono">{children}</code> :
|
| 773 |
-
<pre className="bg-gray-100 p-2 rounded overflow-x-auto my-2">
|
| 774 |
-
<code className="text-sm font-mono">{children}</code>
|
| 775 |
-
</pre>,
|
| 776 |
-
blockquote: ({ children }) => (
|
| 777 |
-
<blockquote className="border-l-4 border-blue-200 pl-4 italic text-gray-700 my-2">
|
| 778 |
-
{children}
|
| 779 |
-
</blockquote>
|
| 780 |
-
)
|
| 781 |
-
}}
|
| 782 |
-
>
|
| 783 |
-
{message.text}
|
| 784 |
-
</ReactMarkdown>
|
| 785 |
-
</div>
|
| 786 |
-
</div>
|
| 787 |
-
) : (
|
| 788 |
-
<div key={index} className="w-full py-4">
|
| 789 |
-
<div className="prose prose-sm max-w-none">
|
| 790 |
-
<ReactMarkdown
|
| 791 |
-
remarkPlugins={[remarkMath]}
|
| 792 |
-
rehypePlugins={[rehypeRaw, rehypeKatex]}
|
| 793 |
-
components={{
|
| 794 |
-
p: ({ children }) => <p className="mb-2 text-gray-800 leading-relaxed">{children}</p>,
|
| 795 |
-
ul: ({ children }) => <ul className="mb-2 ml-4 list-disc">{children}</ul>,
|
| 796 |
-
ol: ({ children }) => <ol className="mb-2 ml-4 list-decimal">{children}</ol>,
|
| 797 |
-
li: ({ children }) => <li className="mb-1 text-gray-800">{children}</li>,
|
| 798 |
-
strong: ({ children }) => <strong className="font-semibold text-gray-900">{children}</strong>,
|
| 799 |
-
em: ({ children }) => <em className="italic">{children}</em>,
|
| 800 |
-
code: ({ inline, children }) =>
|
| 801 |
-
inline ?
|
| 802 |
-
<code className="bg-gray-100 px-1 py-0.5 rounded text-sm font-mono">{children}</code> :
|
| 803 |
-
<pre className="bg-gray-100 p-2 rounded overflow-x-auto my-2">
|
| 804 |
-
<code className="text-sm font-mono">{children}</code>
|
| 805 |
-
</pre>,
|
| 806 |
-
blockquote: ({ children }) => (
|
| 807 |
-
<blockquote className="border-l-4 border-blue-200 pl-4 italic text-gray-700 my-2">
|
| 808 |
-
{children}
|
| 809 |
-
</blockquote>
|
| 810 |
-
)
|
| 811 |
-
}}
|
| 812 |
-
>
|
| 813 |
-
{message.text}
|
| 814 |
-
</ReactMarkdown>
|
| 815 |
-
</div>
|
| 816 |
-
</div>
|
| 817 |
-
)
|
| 818 |
-
))}
|
| 819 |
-
|
| 820 |
-
{/* Typing animation message */}
|
| 821 |
-
{typingMessage && (
|
| 822 |
-
<div className="w-full py-4">
|
| 823 |
-
<div className="prose prose-sm max-w-none">
|
| 824 |
-
<ReactMarkdown
|
| 825 |
-
remarkPlugins={[remarkMath]}
|
| 826 |
-
rehypePlugins={[rehypeRaw, rehypeKatex]}
|
| 827 |
-
components={{
|
| 828 |
-
p: ({ children }) => <p className="mb-2 text-gray-800 leading-relaxed">{children}</p>,
|
| 829 |
-
ul: ({ children }) => <ul className="mb-2 ml-4 list-disc">{children}</ul>,
|
| 830 |
-
ol: ({ children }) => <ol className="mb-2 ml-4 list-decimal">{children}</ol>,
|
| 831 |
-
li: ({ children }) => <li className="mb-1 text-gray-800">{children}</li>,
|
| 832 |
-
strong: ({ children }) => <strong className="font-semibold text-gray-900">{children}</strong>,
|
| 833 |
-
em: ({ children }) => <em className="italic">{children}</em>,
|
| 834 |
-
code: ({ inline, children }) =>
|
| 835 |
-
inline ?
|
| 836 |
-
<code className="bg-gray-100 px-1 py-0.5 rounded text-sm font-mono">{children}</code> :
|
| 837 |
-
<pre className="bg-gray-100 p-2 rounded overflow-x-auto my-2">
|
| 838 |
-
<code className="text-sm font-mono">{children}</code>
|
| 839 |
-
</pre>,
|
| 840 |
-
blockquote: ({ children }) => (
|
| 841 |
-
<blockquote className="border-l-4 border-blue-200 pl-4 italic text-gray-700 my-2">
|
| 842 |
-
{children}
|
| 843 |
-
</blockquote>
|
| 844 |
-
)
|
| 845 |
-
}}
|
| 846 |
-
>
|
| 847 |
-
{typingMessage}
|
| 848 |
-
</ReactMarkdown>
|
| 849 |
-
</div>
|
| 850 |
-
</div>
|
| 851 |
-
)}
|
| 852 |
-
|
| 853 |
-
{/* Loading dots */}
|
| 854 |
-
{chatLoading && (
|
| 855 |
-
<div className="w-full py-4">
|
| 856 |
-
<div className="flex space-x-1">
|
| 857 |
-
<div className="w-2 h-2 bg-gray-400 rounded-full animate-bounce"></div>
|
| 858 |
-
<div className="w-2 h-2 bg-gray-400 rounded-full animate-bounce" style={{animationDelay: '0.1s'}}></div>
|
| 859 |
-
<div className="w-2 h-2 bg-gray-400 rounded-full animate-bounce" style={{animationDelay: '0.2s'}}></div>
|
| 860 |
-
</div>
|
| 861 |
-
</div>
|
| 862 |
-
)}
|
| 863 |
-
</div>
|
| 864 |
-
|
| 865 |
-
{/* Chat Input - sticky at bottom */}
|
| 866 |
-
<div className="flex-shrink-0 bg-white border-t border-gray-200 p-6">
|
| 867 |
-
<div className="flex gap-2 mb-3">
|
| 868 |
-
<input
|
| 869 |
-
type="text"
|
| 870 |
-
value={userInput}
|
| 871 |
-
onChange={(e) => setUserInput(e.target.value)}
|
| 872 |
-
placeholder="Type your response..."
|
| 873 |
-
className="flex-1 px-3 py-2 border border-gray-200 rounded-lg text-sm focus:outline-none focus:ring-1 focus:ring-gray-300"
|
| 874 |
-
/>
|
| 875 |
-
<button className="px-4 py-2 bg-gray-50 hover:bg-gray-100 text-gray-600 rounded-lg transition-all">
|
| 876 |
-
→
|
| 877 |
-
</button>
|
| 878 |
-
</div>
|
| 879 |
-
|
| 880 |
-
</div>
|
| 881 |
-
</div>
|
| 882 |
-
)}
|
| 883 |
-
</div>
|
| 884 |
-
</div>
|
| 885 |
-
</div>
|
| 886 |
-
);
|
| 887 |
-
}
|
| 888 |
-
|
| 889 |
-
export default DocumentProcessor;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/components/ImageComponent.jsx
DELETED
|
@@ -1,115 +0,0 @@
|
|
| 1 |
-
import { useState, useEffect, memo } from 'react';
|
| 2 |
-
|
| 3 |
-
/**
|
| 4 |
-
* ImageComponent - Handles loading and displaying images from the backend
|
| 5 |
-
*
|
| 6 |
-
* Props:
|
| 7 |
-
* - src: The image ID to fetch
|
| 8 |
-
* - alt: Alt text for the image
|
| 9 |
-
* - fileId: The document file ID (for fetching the image)
|
| 10 |
-
* - imageCache: Object containing cached images
|
| 11 |
-
* - onImageCached: Callback when image is successfully cached
|
| 12 |
-
*/
|
| 13 |
-
const ImageComponent = memo(({ src, alt, fileId, imageCache, onImageCached }) => {
|
| 14 |
-
// Local state for this specific image
|
| 15 |
-
const [imageSrc, setImageSrc] = useState(null);
|
| 16 |
-
const [loading, setLoading] = useState(true);
|
| 17 |
-
|
| 18 |
-
useEffect(() => {
|
| 19 |
-
// Only proceed if we have the required data
|
| 20 |
-
if (!fileId || !src) {
|
| 21 |
-
setLoading(false);
|
| 22 |
-
return;
|
| 23 |
-
}
|
| 24 |
-
|
| 25 |
-
// Check if image is already cached
|
| 26 |
-
if (imageCache && imageCache[src]) {
|
| 27 |
-
setImageSrc(imageCache[src]);
|
| 28 |
-
setLoading(false);
|
| 29 |
-
return;
|
| 30 |
-
}
|
| 31 |
-
|
| 32 |
-
// Fetch the image from backend
|
| 33 |
-
const fetchImage = async () => {
|
| 34 |
-
try {
|
| 35 |
-
const response = await fetch(`/get_image/${fileId}/${src}`);
|
| 36 |
-
if (response.ok) {
|
| 37 |
-
const data = await response.json();
|
| 38 |
-
const imageData = data.image_base64;
|
| 39 |
-
|
| 40 |
-
// Set the image for display
|
| 41 |
-
setImageSrc(imageData);
|
| 42 |
-
|
| 43 |
-
// Notify parent component to cache this image
|
| 44 |
-
if (onImageCached) {
|
| 45 |
-
onImageCached(src, imageData);
|
| 46 |
-
}
|
| 47 |
-
}
|
| 48 |
-
} catch (error) {
|
| 49 |
-
console.error('Error fetching image:', error);
|
| 50 |
-
} finally {
|
| 51 |
-
setLoading(false);
|
| 52 |
-
}
|
| 53 |
-
};
|
| 54 |
-
|
| 55 |
-
fetchImage();
|
| 56 |
-
}, [src, fileId, imageCache, onImageCached]);
|
| 57 |
-
|
| 58 |
-
// Show loading state
|
| 59 |
-
if (loading) {
|
| 60 |
-
return (
|
| 61 |
-
<span style={{
|
| 62 |
-
display: 'inline-block',
|
| 63 |
-
width: '100%',
|
| 64 |
-
height: '200px',
|
| 65 |
-
backgroundColor: '#f3f4f6',
|
| 66 |
-
textAlign: 'center',
|
| 67 |
-
lineHeight: '200px',
|
| 68 |
-
margin: '1rem 0',
|
| 69 |
-
borderRadius: '0.5rem',
|
| 70 |
-
color: '#6b7280'
|
| 71 |
-
}}>
|
| 72 |
-
Loading image...
|
| 73 |
-
</span>
|
| 74 |
-
);
|
| 75 |
-
}
|
| 76 |
-
|
| 77 |
-
// Show error state if image couldn't be loaded
|
| 78 |
-
if (!imageSrc) {
|
| 79 |
-
return (
|
| 80 |
-
<span style={{
|
| 81 |
-
display: 'inline-block',
|
| 82 |
-
width: '100%',
|
| 83 |
-
height: '200px',
|
| 84 |
-
backgroundColor: '#fef2f2',
|
| 85 |
-
textAlign: 'center',
|
| 86 |
-
lineHeight: '200px',
|
| 87 |
-
margin: '1rem 0',
|
| 88 |
-
borderRadius: '0.5rem',
|
| 89 |
-
border: '1px solid #fecaca',
|
| 90 |
-
color: '#dc2626'
|
| 91 |
-
}}>
|
| 92 |
-
Image not found: {alt || src}
|
| 93 |
-
</span>
|
| 94 |
-
);
|
| 95 |
-
}
|
| 96 |
-
|
| 97 |
-
// Render the actual image
|
| 98 |
-
return (
|
| 99 |
-
<img
|
| 100 |
-
src={imageSrc}
|
| 101 |
-
alt={alt || 'Document image'}
|
| 102 |
-
style={{
|
| 103 |
-
display: 'block',
|
| 104 |
-
maxWidth: '100%',
|
| 105 |
-
height: 'auto',
|
| 106 |
-
margin: '1.5rem auto'
|
| 107 |
-
}}
|
| 108 |
-
/>
|
| 109 |
-
);
|
| 110 |
-
});
|
| 111 |
-
|
| 112 |
-
// Set display name for debugging
|
| 113 |
-
ImageComponent.displayName = 'ImageComponent';
|
| 114 |
-
|
| 115 |
-
export default ImageComponent;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/components/LoadingAnimation.jsx
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
-
const LoadingAnimation = ({ uploadProgress
|
| 2 |
<div className="flex flex-col items-center justify-center min-h-screen bg-gray-50">
|
| 3 |
<div className="text-center max-w-md">
|
| 4 |
<div className="mb-8">
|
| 5 |
<div className="w-16 h-16 border-4 border-blue-500 border-t-transparent rounded-full animate-spin mx-auto mb-4"></div>
|
| 6 |
<h2 className="text-2xl font-bold text-gray-900 mb-2">Processing Your Document</h2>
|
| 7 |
-
<p className="text-gray-600">
|
| 8 |
</div>
|
| 9 |
|
| 10 |
{/* Upload Progress */}
|
|
@@ -21,22 +21,8 @@ const LoadingAnimation = ({ uploadProgress, ocrProgress }) => (
|
|
| 21 |
</div>
|
| 22 |
</div>
|
| 23 |
|
| 24 |
-
{/* OCR Progress */}
|
| 25 |
-
<div className="mb-6">
|
| 26 |
-
<div className="flex justify-between text-sm text-gray-600 mb-1">
|
| 27 |
-
<span>Processing with AI</span>
|
| 28 |
-
<span>{ocrProgress}%</span>
|
| 29 |
-
</div>
|
| 30 |
-
<div className="w-full bg-gray-200 rounded-full h-2">
|
| 31 |
-
<div
|
| 32 |
-
className="bg-green-500 h-2 rounded-full transition-all duration-300"
|
| 33 |
-
style={{ width: `${ocrProgress}%` }}
|
| 34 |
-
></div>
|
| 35 |
-
</div>
|
| 36 |
-
</div>
|
| 37 |
-
|
| 38 |
<p className="text-sm text-gray-500">
|
| 39 |
-
|
| 40 |
</p>
|
| 41 |
</div>
|
| 42 |
</div>
|
|
|
|
| 1 |
+
const LoadingAnimation = ({ uploadProgress }) => (
|
| 2 |
<div className="flex flex-col items-center justify-center min-h-screen bg-gray-50">
|
| 3 |
<div className="text-center max-w-md">
|
| 4 |
<div className="mb-8">
|
| 5 |
<div className="w-16 h-16 border-4 border-blue-500 border-t-transparent rounded-full animate-spin mx-auto mb-4"></div>
|
| 6 |
<h2 className="text-2xl font-bold text-gray-900 mb-2">Processing Your Document</h2>
|
| 7 |
+
<p className="text-gray-600">Uploading your PDF...</p>
|
| 8 |
</div>
|
| 9 |
|
| 10 |
{/* Upload Progress */}
|
|
|
|
| 21 |
</div>
|
| 22 |
</div>
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
<p className="text-sm text-gray-500">
|
| 25 |
+
Preparing your document for viewing...
|
| 26 |
</p>
|
| 27 |
</div>
|
| 28 |
</div>
|
frontend/src/components/SimpleChat.jsx
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { useState } from 'react';
|
| 2 |
+
|
| 3 |
+
const SimpleChat = () => {
|
| 4 |
+
const [messages, setMessages] = useState([
|
| 5 |
+
{
|
| 6 |
+
id: 1,
|
| 7 |
+
role: 'assistant',
|
| 8 |
+
content: 'Hi! Ask me anything about this section.'
|
| 9 |
+
}
|
| 10 |
+
]);
|
| 11 |
+
const [input, setInput] = useState('');
|
| 12 |
+
const [isLoading, setIsLoading] = useState(false);
|
| 13 |
+
|
| 14 |
+
const sendMessage = async (e) => {
|
| 15 |
+
e.preventDefault();
|
| 16 |
+
if (!input.trim() || isLoading) return;
|
| 17 |
+
|
| 18 |
+
const userMessage = {
|
| 19 |
+
id: Date.now(),
|
| 20 |
+
role: 'user',
|
| 21 |
+
content: input.trim()
|
| 22 |
+
};
|
| 23 |
+
|
| 24 |
+
setMessages(prev => [...prev, userMessage]);
|
| 25 |
+
setInput('');
|
| 26 |
+
setIsLoading(true);
|
| 27 |
+
|
| 28 |
+
try {
|
| 29 |
+
const response = await fetch('/api/chat', {
|
| 30 |
+
method: 'POST',
|
| 31 |
+
headers: { 'Content-Type': 'application/json' },
|
| 32 |
+
body: JSON.stringify({
|
| 33 |
+
messages: [...messages, userMessage].map(msg => ({
|
| 34 |
+
role: msg.role,
|
| 35 |
+
content: msg.content
|
| 36 |
+
}))
|
| 37 |
+
})
|
| 38 |
+
});
|
| 39 |
+
|
| 40 |
+
const data = await response.json();
|
| 41 |
+
|
| 42 |
+
setMessages(prev => [...prev, {
|
| 43 |
+
id: Date.now() + 1,
|
| 44 |
+
role: 'assistant',
|
| 45 |
+
content: data.content || data.message || 'Sorry, no response received.'
|
| 46 |
+
}]);
|
| 47 |
+
} catch (error) {
|
| 48 |
+
console.error('Error:', error);
|
| 49 |
+
setMessages(prev => [...prev, {
|
| 50 |
+
id: Date.now() + 1,
|
| 51 |
+
role: 'assistant',
|
| 52 |
+
content: 'Sorry, something went wrong. Please try again.'
|
| 53 |
+
}]);
|
| 54 |
+
} finally {
|
| 55 |
+
setIsLoading(false);
|
| 56 |
+
}
|
| 57 |
+
};
|
| 58 |
+
|
| 59 |
+
return (
|
| 60 |
+
<div className="flex flex-col h-full">
|
| 61 |
+
{/* Messages */}
|
| 62 |
+
<div className="flex-1 overflow-y-auto p-4 space-y-3">
|
| 63 |
+
{messages.map(message => (
|
| 64 |
+
<div
|
| 65 |
+
key={message.id}
|
| 66 |
+
className={`flex ${message.role === 'user' ? 'justify-end' : 'justify-start'}`}
|
| 67 |
+
>
|
| 68 |
+
<div
|
| 69 |
+
className={`max-w-[70%] p-3 rounded-lg ${
|
| 70 |
+
message.role === 'user'
|
| 71 |
+
? 'bg-blue-500 text-white'
|
| 72 |
+
: 'bg-gray-100 text-gray-900'
|
| 73 |
+
}`}
|
| 74 |
+
>
|
| 75 |
+
{message.content}
|
| 76 |
+
</div>
|
| 77 |
+
</div>
|
| 78 |
+
))}
|
| 79 |
+
{isLoading && (
|
| 80 |
+
<div className="flex justify-start">
|
| 81 |
+
<div className="bg-gray-100 p-3 rounded-lg">
|
| 82 |
+
<div className="flex space-x-1">
|
| 83 |
+
<div className="w-2 h-2 bg-gray-400 rounded-full animate-bounce"></div>
|
| 84 |
+
<div className="w-2 h-2 bg-gray-400 rounded-full animate-bounce" style={{animationDelay: '0.1s'}}></div>
|
| 85 |
+
<div className="w-2 h-2 bg-gray-400 rounded-full animate-bounce" style={{animationDelay: '0.2s'}}></div>
|
| 86 |
+
</div>
|
| 87 |
+
</div>
|
| 88 |
+
</div>
|
| 89 |
+
)}
|
| 90 |
+
</div>
|
| 91 |
+
|
| 92 |
+
{/* Input */}
|
| 93 |
+
<form onSubmit={sendMessage} className="p-4 border-t">
|
| 94 |
+
<div className="flex space-x-2">
|
| 95 |
+
<input
|
| 96 |
+
type="text"
|
| 97 |
+
value={input}
|
| 98 |
+
onChange={(e) => setInput(e.target.value)}
|
| 99 |
+
placeholder="Type your message..."
|
| 100 |
+
disabled={isLoading}
|
| 101 |
+
className="flex-1 px-3 py-2 border rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 disabled:bg-gray-100"
|
| 102 |
+
/>
|
| 103 |
+
<button
|
| 104 |
+
type="submit"
|
| 105 |
+
disabled={!input.trim() || isLoading}
|
| 106 |
+
className="px-4 py-2 bg-blue-500 text-white rounded-lg hover:bg-blue-600 disabled:bg-gray-300 disabled:cursor-not-allowed"
|
| 107 |
+
>
|
| 108 |
+
{isLoading ? '...' : 'Send'}
|
| 109 |
+
</button>
|
| 110 |
+
</div>
|
| 111 |
+
</form>
|
| 112 |
+
</div>
|
| 113 |
+
);
|
| 114 |
+
};
|
| 115 |
+
|
| 116 |
+
export default SimpleChat;
|
frontend/src/components/UploadPage.jsx
DELETED
|
@@ -1,277 +0,0 @@
|
|
| 1 |
-
import { useState, useRef } from 'react';
|
| 2 |
-
import { Document, Page, pdfjs } from 'react-pdf';
|
| 3 |
-
import 'react-pdf/dist/Page/AnnotationLayer.css';
|
| 4 |
-
import 'react-pdf/dist/Page/TextLayer.css';
|
| 5 |
-
|
| 6 |
-
pdfjs.GlobalWorkerOptions.workerSrc = '/pdf.worker.min.js';
|
| 7 |
-
|
| 8 |
-
function UploadPage() {
|
| 9 |
-
const fileInputRef = useRef(null);
|
| 10 |
-
const pdfContainerRef = useRef(null);
|
| 11 |
-
const [selectedFile, setSelectedFile] = useState(null);
|
| 12 |
-
const [numPages, setNumPages] = useState(null);
|
| 13 |
-
const [currentPage, setCurrentPage] = useState(1);
|
| 14 |
-
const [zoomLevel, setZoomLevel] = useState(1);
|
| 15 |
-
const [visiblePages, setVisiblePages] = useState(new Set([1]));
|
| 16 |
-
const [chunks, setChunks] = useState([]);
|
| 17 |
-
const [processing, setProcessing] = useState(false);
|
| 18 |
-
|
| 19 |
-
const handleFileChange = (e) => {
|
| 20 |
-
setSelectedFile(e.target.files[0]);
|
| 21 |
-
setChunks([]); // Clear previous chunks
|
| 22 |
-
};
|
| 23 |
-
|
| 24 |
-
const processPdf = async () => {
|
| 25 |
-
if (!selectedFile) return;
|
| 26 |
-
|
| 27 |
-
setProcessing(true);
|
| 28 |
-
const formData = new FormData();
|
| 29 |
-
formData.append('file', selectedFile);
|
| 30 |
-
|
| 31 |
-
try {
|
| 32 |
-
const response = await fetch('http://localhost:8000/upload_pdf', {
|
| 33 |
-
method: 'POST',
|
| 34 |
-
body: formData,
|
| 35 |
-
});
|
| 36 |
-
|
| 37 |
-
if (response.ok) {
|
| 38 |
-
const data = await response.json();
|
| 39 |
-
// Handle the new response format - create a fake chunk array for now
|
| 40 |
-
setChunks([{
|
| 41 |
-
text: `File processed: ${data.filename}`,
|
| 42 |
-
page_number: 1,
|
| 43 |
-
chunk_type: "info",
|
| 44 |
-
size: data.size,
|
| 45 |
-
has_api_key: data.has_api_key
|
| 46 |
-
}]);
|
| 47 |
-
} else {
|
| 48 |
-
console.error('Failed to process PDF');
|
| 49 |
-
}
|
| 50 |
-
} catch (error) {
|
| 51 |
-
console.error('Error processing PDF:', error);
|
| 52 |
-
} finally {
|
| 53 |
-
setProcessing(false);
|
| 54 |
-
}
|
| 55 |
-
};
|
| 56 |
-
|
| 57 |
-
// Handle scroll to update current page and track visible pages
|
| 58 |
-
const handleScroll = () => {
|
| 59 |
-
if (!pdfContainerRef.current || !numPages) return;
|
| 60 |
-
|
| 61 |
-
const container = pdfContainerRef.current;
|
| 62 |
-
const scrollTop = container.scrollTop;
|
| 63 |
-
const containerHeight = container.clientHeight;
|
| 64 |
-
const totalScrollHeight = container.scrollHeight - containerHeight;
|
| 65 |
-
|
| 66 |
-
// Calculate which page we're viewing based on scroll position
|
| 67 |
-
const scrollPercent = scrollTop / totalScrollHeight;
|
| 68 |
-
const newPage = Math.min(Math.floor(scrollPercent * numPages) + 1, numPages);
|
| 69 |
-
|
| 70 |
-
if (newPage !== currentPage) {
|
| 71 |
-
setCurrentPage(newPage);
|
| 72 |
-
}
|
| 73 |
-
|
| 74 |
-
// Track visible pages based on zoom level (more pages visible when zoomed out)
|
| 75 |
-
const newVisiblePages = new Set();
|
| 76 |
-
const visibleRange = Math.max(1, Math.ceil(2 / zoomLevel)); // More pages when zoomed out
|
| 77 |
-
for (let i = Math.max(1, newPage - visibleRange); i <= Math.min(numPages, newPage + visibleRange); i++) {
|
| 78 |
-
newVisiblePages.add(i);
|
| 79 |
-
}
|
| 80 |
-
|
| 81 |
-
// Update visible pages if changed
|
| 82 |
-
if (newVisiblePages.size !== visiblePages.size ||
|
| 83 |
-
![...newVisiblePages].every(page => visiblePages.has(page))) {
|
| 84 |
-
setVisiblePages(newVisiblePages);
|
| 85 |
-
}
|
| 86 |
-
};
|
| 87 |
-
|
| 88 |
-
// Jump to specific page
|
| 89 |
-
const goToPage = (pageNumber) => {
|
| 90 |
-
if (!pdfContainerRef.current || !numPages) return;
|
| 91 |
-
|
| 92 |
-
// Update visible pages immediately for target page
|
| 93 |
-
const newVisiblePages = new Set();
|
| 94 |
-
const visibleRange = Math.max(1, Math.ceil(2 / zoomLevel)); // More pages when zoomed out
|
| 95 |
-
for (let i = Math.max(1, pageNumber - visibleRange); i <= Math.min(numPages, pageNumber + visibleRange); i++) {
|
| 96 |
-
newVisiblePages.add(i);
|
| 97 |
-
}
|
| 98 |
-
setVisiblePages(newVisiblePages);
|
| 99 |
-
|
| 100 |
-
const container = pdfContainerRef.current;
|
| 101 |
-
const totalScrollHeight = container.scrollHeight - container.clientHeight;
|
| 102 |
-
|
| 103 |
-
// Calculate scroll position for the target page
|
| 104 |
-
const targetScrollPercent = (pageNumber - 1) / numPages;
|
| 105 |
-
const targetScrollTop = targetScrollPercent * totalScrollHeight;
|
| 106 |
-
|
| 107 |
-
container.scrollTo({
|
| 108 |
-
top: targetScrollTop,
|
| 109 |
-
behavior: 'smooth'
|
| 110 |
-
});
|
| 111 |
-
};
|
| 112 |
-
|
| 113 |
-
// Zoom controls
|
| 114 |
-
const zoomIn = () => setZoomLevel(prev => Math.min(prev + 0.25, 3));
|
| 115 |
-
const zoomOut = () => setZoomLevel(prev => Math.max(prev - 0.25, 0.5));
|
| 116 |
-
const resetZoom = () => setZoomLevel(1);
|
| 117 |
-
|
| 118 |
-
return (
|
| 119 |
-
<div className="h-screen bg-gray-50 overflow-hidden">
|
| 120 |
-
{!selectedFile ? (
|
| 121 |
-
// Show upload UI
|
| 122 |
-
<div className="flex items-center justify-center min-h-screen">
|
| 123 |
-
<div className="text-center">
|
| 124 |
-
<h1 className="text-3xl font-bold text-gray-900 mb-4">
|
| 125 |
-
Upload Your PDF
|
| 126 |
-
</h1>
|
| 127 |
-
<p className="text-gray-600 mb-8">
|
| 128 |
-
Click below to upload a PDF and start your deep dive.
|
| 129 |
-
</p>
|
| 130 |
-
<input
|
| 131 |
-
ref={fileInputRef}
|
| 132 |
-
type="file"
|
| 133 |
-
accept=".pdf"
|
| 134 |
-
className="hidden"
|
| 135 |
-
onChange={handleFileChange}
|
| 136 |
-
/>
|
| 137 |
-
<button
|
| 138 |
-
onClick={() => fileInputRef.current.click()}
|
| 139 |
-
className="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded"
|
| 140 |
-
>
|
| 141 |
-
Upload PDF
|
| 142 |
-
</button>
|
| 143 |
-
</div>
|
| 144 |
-
</div>
|
| 145 |
-
) : (
|
| 146 |
-
// Show PDF + chat layout
|
| 147 |
-
<div className="flex h-screen">
|
| 148 |
-
<div className="w-2/3 bg-white flex flex-col relative">
|
| 149 |
-
{/* PDF container with scrolling */}
|
| 150 |
-
<div
|
| 151 |
-
ref={pdfContainerRef}
|
| 152 |
-
className="flex-1 overflow-auto flex justify-center bg-gray-100"
|
| 153 |
-
onScroll={handleScroll}
|
| 154 |
-
>
|
| 155 |
-
<div className="py-4">
|
| 156 |
-
<Document
|
| 157 |
-
file={selectedFile}
|
| 158 |
-
onLoadSuccess={({ numPages }) => setNumPages(numPages)}
|
| 159 |
-
>
|
| 160 |
-
{/* Render all pages continuously */}
|
| 161 |
-
{numPages && Array.from(new Array(numPages), (_, index) => {
|
| 162 |
-
const pageNum = index + 1;
|
| 163 |
-
const isVisible = visiblePages.has(pageNum);
|
| 164 |
-
const currentZoom = isVisible ? zoomLevel : 1; // Only zoom visible pages
|
| 165 |
-
|
| 166 |
-
return (
|
| 167 |
-
<div key={pageNum} className="mb-4 flex justify-center">
|
| 168 |
-
<Page
|
| 169 |
-
pageNumber={pageNum}
|
| 170 |
-
width={typeof window !== 'undefined' ? window.innerWidth * 0.66 * 0.9 * currentZoom : 600 * currentZoom}
|
| 171 |
-
/>
|
| 172 |
-
</div>
|
| 173 |
-
);
|
| 174 |
-
})}
|
| 175 |
-
</Document>
|
| 176 |
-
</div>
|
| 177 |
-
</div>
|
| 178 |
-
{/* Pagination overlay - floating pill */}
|
| 179 |
-
{numPages && (
|
| 180 |
-
<div className="absolute bottom-4 left-1/2 transform -translate-x-1/2 z-10">
|
| 181 |
-
<div className="flex items-center bg-gray-800/90 backdrop-blur-sm rounded-full shadow-lg px-3 py-2 space-x-3">
|
| 182 |
-
<button
|
| 183 |
-
onClick={() => goToPage(Math.max(currentPage - 1, 1))}
|
| 184 |
-
disabled={currentPage <= 1}
|
| 185 |
-
className="w-8 h-8 rounded-full bg-gray-600 hover:bg-gray-500 disabled:opacity-30 disabled:cursor-not-allowed flex items-center justify-center transition-colors text-white"
|
| 186 |
-
>
|
| 187 |
-
<svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
|
| 188 |
-
<path d="M10 12l-4-4 4-4v8z"/>
|
| 189 |
-
</svg>
|
| 190 |
-
</button>
|
| 191 |
-
|
| 192 |
-
<span className="px-3 py-1 text-sm font-medium text-white min-w-[60px] text-center">
|
| 193 |
-
{currentPage}/{numPages}
|
| 194 |
-
</span>
|
| 195 |
-
|
| 196 |
-
<button
|
| 197 |
-
onClick={() => goToPage(Math.min(currentPage + 1, numPages))}
|
| 198 |
-
disabled={currentPage >= numPages}
|
| 199 |
-
className="w-8 h-8 rounded-full bg-gray-600 hover:bg-gray-500 disabled:opacity-30 disabled:cursor-not-allowed flex items-center justify-center transition-colors text-white"
|
| 200 |
-
>
|
| 201 |
-
<svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
|
| 202 |
-
<path d="M6 4l4 4-4 4V4z"/>
|
| 203 |
-
</svg>
|
| 204 |
-
</button>
|
| 205 |
-
</div>
|
| 206 |
-
</div>
|
| 207 |
-
)}
|
| 208 |
-
|
| 209 |
-
{/* Zoom controls overlay - bottom right */}
|
| 210 |
-
{numPages && (
|
| 211 |
-
<div className="absolute bottom-4 right-4 z-10 flex flex-col items-center space-y-2">
|
| 212 |
-
{/* Main zoom pill - vertical */}
|
| 213 |
-
<div className="flex flex-col items-center bg-gray-800/90 backdrop-blur-sm rounded-full shadow-lg px-2 py-2 space-y-1">
|
| 214 |
-
<button
|
| 215 |
-
onClick={zoomIn}
|
| 216 |
-
disabled={zoomLevel >= 3}
|
| 217 |
-
className="w-6 h-6 rounded-full bg-gray-600 hover:bg-gray-500 disabled:opacity-30 disabled:cursor-not-allowed flex items-center justify-center transition-colors text-white"
|
| 218 |
-
>
|
| 219 |
-
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor">
|
| 220 |
-
<path d="M8 4v4H4v1h4v4h1V9h4V8H9V4z"/>
|
| 221 |
-
</svg>
|
| 222 |
-
</button>
|
| 223 |
-
|
| 224 |
-
<button
|
| 225 |
-
onClick={zoomOut}
|
| 226 |
-
disabled={zoomLevel <= 0.5}
|
| 227 |
-
className="w-6 h-6 rounded-full bg-gray-600 hover:bg-gray-500 disabled:opacity-30 disabled:cursor-not-allowed flex items-center justify-center transition-colors text-white"
|
| 228 |
-
>
|
| 229 |
-
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor">
|
| 230 |
-
<path d="M4 8h8v1H4z"/>
|
| 231 |
-
</svg>
|
| 232 |
-
</button>
|
| 233 |
-
</div>
|
| 234 |
-
|
| 235 |
-
{/* Reset button below */}
|
| 236 |
-
<button
|
| 237 |
-
onClick={resetZoom}
|
| 238 |
-
className="w-10 h-10 bg-gray-700 hover:bg-gray-500 backdrop-blur-sm rounded-full shadow-lg flex items-center justify-center text-white transition-colors"
|
| 239 |
-
>
|
| 240 |
-
<svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor" stroke="currentColor" strokeWidth="0.5">
|
| 241 |
-
<path d="M8 3a5 5 0 1 0 4.546 2.914.5.5 0 0 1 .908-.417A6 6 0 1 1 8 2v1z" strokeWidth="1"/>
|
| 242 |
-
<path d="M8 4.466V.534a.25.25 0 0 1 .41-.192l2.36 1.966c.12.1.12.284 0 .384L8.41 4.658A.25.25 0 0 1 8 4.466z"/>
|
| 243 |
-
</svg>
|
| 244 |
-
</button>
|
| 245 |
-
</div>
|
| 246 |
-
)}
|
| 247 |
-
|
| 248 |
-
</div>
|
| 249 |
-
{/* White separator bar */}
|
| 250 |
-
<div className="w-4 bg-white"></div>
|
| 251 |
-
|
| 252 |
-
<div className="flex-1 bg-gray-100 overflow-auto">
|
| 253 |
-
<div className="p-4">
|
| 254 |
-
<button
|
| 255 |
-
onClick={processPdf}
|
| 256 |
-
disabled={processing}
|
| 257 |
-
className="bg-green-500 hover:bg-green-700 text-white font-bold py-2 px-4 rounded mb-4"
|
| 258 |
-
>
|
| 259 |
-
{processing ? 'Processing...' : 'Process PDF'}
|
| 260 |
-
</button>
|
| 261 |
-
<div>
|
| 262 |
-
{chunks.map((chunk, index) => (
|
| 263 |
-
<div key={index} className="bg-white p-4 rounded-lg shadow mb-4">
|
| 264 |
-
<p className="text-sm text-gray-600">Page: {chunk.page_number}, Type: {chunk.chunk_type}</p>
|
| 265 |
-
<p className="text-gray-800">{chunk.text}</p>
|
| 266 |
-
</div>
|
| 267 |
-
))}
|
| 268 |
-
</div>
|
| 269 |
-
</div>
|
| 270 |
-
</div>
|
| 271 |
-
</div>
|
| 272 |
-
)}
|
| 273 |
-
</div>
|
| 274 |
-
);
|
| 275 |
-
}
|
| 276 |
-
|
| 277 |
-
export default UploadPage;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/hooks/useChat.js
DELETED
|
@@ -1,109 +0,0 @@
|
|
| 1 |
-
import { useState, useRef } from 'react';
|
| 2 |
-
|
| 3 |
-
export const useChat = () => {
|
| 4 |
-
const [chatData, setChatData] = useState({});
|
| 5 |
-
const [chatLoading, setChatLoading] = useState(false);
|
| 6 |
-
const [chatMessages, setChatMessages] = useState({});
|
| 7 |
-
const [userInput, setUserInput] = useState('');
|
| 8 |
-
const [typingMessage, setTypingMessage] = useState('');
|
| 9 |
-
const [typingInterval, setTypingInterval] = useState(null);
|
| 10 |
-
|
| 11 |
-
const typeMessage = (text, callback) => {
|
| 12 |
-
if (typingInterval) {
|
| 13 |
-
clearInterval(typingInterval);
|
| 14 |
-
}
|
| 15 |
-
|
| 16 |
-
setTypingMessage('');
|
| 17 |
-
let currentIndex = 0;
|
| 18 |
-
const typeSpeed = Math.max(1, Math.min(3, 200 / text.length));
|
| 19 |
-
|
| 20 |
-
const interval = setInterval(() => {
|
| 21 |
-
if (currentIndex < text.length) {
|
| 22 |
-
setTypingMessage(text.slice(0, currentIndex + 1));
|
| 23 |
-
currentIndex++;
|
| 24 |
-
} else {
|
| 25 |
-
clearInterval(interval);
|
| 26 |
-
setTypingInterval(null);
|
| 27 |
-
setTypingMessage('');
|
| 28 |
-
callback();
|
| 29 |
-
}
|
| 30 |
-
}, typeSpeed);
|
| 31 |
-
|
| 32 |
-
setTypingInterval(interval);
|
| 33 |
-
};
|
| 34 |
-
|
| 35 |
-
const startChunkLesson = async (chunkIndex, documentData) => {
|
| 36 |
-
if (!documentData || !documentData.chunks[chunkIndex]) return;
|
| 37 |
-
|
| 38 |
-
setChatLoading(true);
|
| 39 |
-
|
| 40 |
-
try {
|
| 41 |
-
const chunk = documentData.chunks[chunkIndex];
|
| 42 |
-
console.log('Starting lesson for chunk:', chunkIndex, chunk);
|
| 43 |
-
console.log('Document data:', documentData.fileId, documentData.markdown?.length);
|
| 44 |
-
|
| 45 |
-
const response = await fetch(`/start_chunk_lesson/${documentData.fileId}/${chunkIndex}`, {
|
| 46 |
-
method: 'POST',
|
| 47 |
-
headers: {
|
| 48 |
-
'Content-Type': 'application/json',
|
| 49 |
-
},
|
| 50 |
-
body: JSON.stringify({
|
| 51 |
-
chunk: chunk,
|
| 52 |
-
document_markdown: documentData.markdown
|
| 53 |
-
})
|
| 54 |
-
});
|
| 55 |
-
|
| 56 |
-
if (!response.ok) {
|
| 57 |
-
const errorData = await response.text();
|
| 58 |
-
console.error('Backend error:', errorData);
|
| 59 |
-
throw new Error(`Failed to start lesson: ${response.status} - ${errorData}`);
|
| 60 |
-
}
|
| 61 |
-
|
| 62 |
-
const lessonData = await response.json();
|
| 63 |
-
setChatData(prev => ({
|
| 64 |
-
...prev,
|
| 65 |
-
[chunkIndex]: {
|
| 66 |
-
...lessonData,
|
| 67 |
-
chunkIndex: chunkIndex,
|
| 68 |
-
chunk: chunk
|
| 69 |
-
}
|
| 70 |
-
}));
|
| 71 |
-
|
| 72 |
-
setChatLoading(false);
|
| 73 |
-
|
| 74 |
-
typeMessage(lessonData.questions, () => {
|
| 75 |
-
setChatMessages(prev => ({
|
| 76 |
-
...prev,
|
| 77 |
-
[chunkIndex]: [
|
| 78 |
-
{ type: 'ai', text: lessonData.questions }
|
| 79 |
-
]
|
| 80 |
-
}));
|
| 81 |
-
});
|
| 82 |
-
|
| 83 |
-
} catch (error) {
|
| 84 |
-
console.error('Error starting lesson:', error);
|
| 85 |
-
alert('Error starting lesson: ' + error.message);
|
| 86 |
-
setChatLoading(false);
|
| 87 |
-
}
|
| 88 |
-
};
|
| 89 |
-
|
| 90 |
-
const clearTypingAnimation = () => {
|
| 91 |
-
if (typingInterval) {
|
| 92 |
-
clearInterval(typingInterval);
|
| 93 |
-
setTypingInterval(null);
|
| 94 |
-
}
|
| 95 |
-
setTypingMessage('');
|
| 96 |
-
};
|
| 97 |
-
|
| 98 |
-
return {
|
| 99 |
-
chatData,
|
| 100 |
-
chatLoading,
|
| 101 |
-
chatMessages,
|
| 102 |
-
userInput,
|
| 103 |
-
typingMessage,
|
| 104 |
-
startChunkLesson,
|
| 105 |
-
clearTypingAnimation,
|
| 106 |
-
setUserInput,
|
| 107 |
-
setChatMessages
|
| 108 |
-
};
|
| 109 |
-
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/src/hooks/useChunkNavigation.js
CHANGED
|
@@ -11,6 +11,7 @@ export const useChunkNavigation = (documentData, clearTypingAnimation) => {
|
|
| 11 |
clearTypingAnimation();
|
| 12 |
}
|
| 13 |
setCurrentChunkIndex(currentChunkIndex + 1);
|
|
|
|
| 14 |
}
|
| 15 |
};
|
| 16 |
|
|
@@ -20,6 +21,7 @@ export const useChunkNavigation = (documentData, clearTypingAnimation) => {
|
|
| 20 |
clearTypingAnimation();
|
| 21 |
}
|
| 22 |
setCurrentChunkIndex(currentChunkIndex - 1);
|
|
|
|
| 23 |
}
|
| 24 |
};
|
| 25 |
|
|
|
|
| 11 |
clearTypingAnimation();
|
| 12 |
}
|
| 13 |
setCurrentChunkIndex(currentChunkIndex + 1);
|
| 14 |
+
setChunkExpanded(true);
|
| 15 |
}
|
| 16 |
};
|
| 17 |
|
|
|
|
| 21 |
clearTypingAnimation();
|
| 22 |
}
|
| 23 |
setCurrentChunkIndex(currentChunkIndex - 1);
|
| 24 |
+
setChunkExpanded(true);
|
| 25 |
}
|
| 26 |
};
|
| 27 |
|
frontend/src/hooks/useDocumentProcessor.js
CHANGED
|
@@ -1,59 +1,23 @@
|
|
| 1 |
-
import { useState, useRef
|
| 2 |
|
| 3 |
export const useDocumentProcessor = () => {
|
| 4 |
const fileInputRef = useRef(null);
|
| 5 |
const [selectedFile, setSelectedFile] = useState(null);
|
| 6 |
const [processing, setProcessing] = useState(false);
|
| 7 |
const [uploadProgress, setUploadProgress] = useState(0);
|
| 8 |
-
const [ocrProgress, setOcrProgress] = useState(0);
|
| 9 |
const [documentData, setDocumentData] = useState(null);
|
| 10 |
-
const [imageCache, setImageCache] = useState({});
|
| 11 |
-
const imageCacheRef = useRef({});
|
| 12 |
|
| 13 |
const handleFileChange = (e) => {
|
| 14 |
setSelectedFile(e.target.files[0]);
|
| 15 |
setDocumentData(null);
|
| 16 |
setUploadProgress(0);
|
| 17 |
-
setOcrProgress(0);
|
| 18 |
-
setImageCache({});
|
| 19 |
-
imageCacheRef.current = {};
|
| 20 |
};
|
| 21 |
|
| 22 |
-
const fetchImage = useCallback(async (imageId, fileId) => {
|
| 23 |
-
if (imageCacheRef.current[imageId]) {
|
| 24 |
-
return imageCacheRef.current[imageId];
|
| 25 |
-
}
|
| 26 |
-
|
| 27 |
-
try {
|
| 28 |
-
const response = await fetch(`/get_image/${fileId}/${imageId}`);
|
| 29 |
-
if (response.ok) {
|
| 30 |
-
const data = await response.json();
|
| 31 |
-
const imageData = data.image_base64;
|
| 32 |
-
|
| 33 |
-
imageCacheRef.current = {
|
| 34 |
-
...imageCacheRef.current,
|
| 35 |
-
[imageId]: imageData
|
| 36 |
-
};
|
| 37 |
-
|
| 38 |
-
setImageCache(prev => ({
|
| 39 |
-
...prev,
|
| 40 |
-
[imageId]: imageData
|
| 41 |
-
}));
|
| 42 |
-
|
| 43 |
-
return imageData;
|
| 44 |
-
}
|
| 45 |
-
} catch (error) {
|
| 46 |
-
console.error('Error fetching image:', error);
|
| 47 |
-
}
|
| 48 |
-
return null;
|
| 49 |
-
}, []);
|
| 50 |
-
|
| 51 |
const processDocument = async () => {
|
| 52 |
if (!selectedFile) return;
|
| 53 |
|
| 54 |
setProcessing(true);
|
| 55 |
setUploadProgress(0);
|
| 56 |
-
setOcrProgress(0);
|
| 57 |
|
| 58 |
try {
|
| 59 |
// Step 1: Upload PDF
|
|
@@ -67,89 +31,92 @@ export const useDocumentProcessor = () => {
|
|
| 67 |
});
|
| 68 |
|
| 69 |
if (!uploadResponse.ok) {
|
|
|
|
|
|
|
| 70 |
throw new Error('Failed to upload PDF');
|
| 71 |
}
|
| 72 |
|
| 73 |
-
const
|
|
|
|
|
|
|
| 74 |
setUploadProgress(100);
|
| 75 |
|
| 76 |
-
//
|
| 77 |
-
|
| 78 |
-
await new Promise(resolve => setTimeout(resolve, 500));
|
| 79 |
-
|
| 80 |
-
setOcrProgress(60);
|
| 81 |
-
const ocrResponse = await fetch(`/process_ocr/${uploadData.file_id}`);
|
| 82 |
-
|
| 83 |
-
if (!ocrResponse.ok) {
|
| 84 |
-
throw new Error('Failed to process OCR');
|
| 85 |
-
}
|
| 86 |
-
|
| 87 |
-
const ocrData = await ocrResponse.json();
|
| 88 |
-
setOcrProgress(100);
|
| 89 |
|
| 90 |
-
// Use hardcoded chunks for
|
| 91 |
const hardcodedChunks = [
|
| 92 |
-
|
| 93 |
"topic": "Magnetfeldmessung und Hysterese-Analyse",
|
| 94 |
-
"text": "Zu Beginn des Versuchs haben wir mit Hilfe des Teslameters die Magnetfeldstärke B an der Position der Cd-Lampe bei verschiedenen Spulenströmen gemessen. (siehe Messwerte in Tabelle 1 im Laborbuch). In Figure 1 sind die gemessenen Feldstärken als Funktion der Stromstärke aufgetragen.\nAnhand der Fehlerbalken und der praktisch identischen Überlagerung der beiden linearen Fitgeraden für auf- und absteigende Stromstärken, wird deutlich, dass keine Hystereseeffekte vorliegen. Der lineare Fit wurde hierbei nur auf die Stromstärken bis einschl. 10A angewandt, da für größere Stromstärken das Magnetfeld nicht in direktem proportionalen Zusammenhang ansteigt. Dies ist mit Sättigungseffekten der Magnetisierung des Eisenkerns der verwendeten Spule zu erklären."
|
| 95 |
-
|
|
|
|
| 96 |
{
|
| 97 |
"topic": "Qualitative Beobachtung des Zeeman-Effekts",
|
| 98 |
-
"text": "Mit Hilfe der CMOS Kamera wurde das Spektrum des emittierten Lichts der Cadmiumlampe unter Verwendung des Lummer Gehercke Interferometers beobachtet. Die Beobachtungen wurden in longitudinaler und transversaler Richtung zum Magnetfeld durchgeführt."
|
| 99 |
-
|
|
|
|
| 100 |
{
|
| 101 |
"topic": "Zeeman-Effekt: Longitudinale Richtung mit Filtern",
|
| 102 |
-
"text": "## ohne Filter:\n\nEs sind deutlich zwei Linien pro Ordnung zu erkennen. Dies sind die $\\sigma^{+}$und $\\sigma^{-}$Linien. Die $\\pi$ Linie ist in longitudinaler Richtung nicht zu beobachten\n\n## mit $\\lambda / 4$-Plättchen und Polarisationsfilter:\n\nVon der Cadmiumlampe aus betrachtet wird zuerst ein $\\lambda / 4$-Plättchen und danach ein Polarisationsfilter in den Strahlengang gebracht. Je nach Ausrichtung der Filter zueinander wird nun eine der beiden Linien ausgeblendet.\n\n$$\n-45^{\\circ} \\text { Winkel: }\n$$\n\nStehen $\\lambda / 4$-Plättchen und Polarisationsfilter zueinander im $-45^{\\circ}$ Winkel, wird das zirkular polarisierte Licht der $\\sigma^{-}$Linie um $45^{\\circ}$ verschoben linear polarisiert und somit vom Polarisationsfilter abgeschirmt. Folglich ist in dieser Konstellation nur die linke der beiden $\\sigma$ Linien zu beobachten.\n\n$$\n+45^{\\circ} \\text { Winkel: }\n$$\n\nStehen $\\lambda / 4$-Plättchen und Polarisationsfilter zueinander im $+45^{\\circ}$ Winkel, ist nach analogem Prinzip wie zuvor nur die rechte Linie auf dem Kamerabild zu beobachten."
|
| 103 |
-
|
|
|
|
| 104 |
{
|
| 105 |
"topic": "Zeeman-Effekt: Transversale Richtung und Polarisation",
|
| 106 |
-
"text": "## ohne Filter:\n\nEs sind deutlich drei Linien pro Ordnung zu erkennen. Dies sind die $\\sigma^{+}, \\pi$ und $\\sigma^{-}$Linien.\n\n## mit Polarisationsfilter horizontal (in B-Feld Richtung):\n\nDie beiden $\\sigma$-Linien sind vollständig ausgeblendet. Die $\\pi-$ Linie ist deutlich sichtbar.\nmit Polarisationsfilter vertikal $\\left(90^{\\circ}\\right.$ zu B-Feld Richtung):\nDie beiden $\\sigma$-Linien sind klar sichtbar. Die $\\pi$-Linie ist ausgeblendet.\n\nWie in Figure 3 gut zu erkennen ist, sind die ausgeblendeten Linien in beiden Konfigurationen weiterhin leicht sichtbar. Dies ist auf das nicht perfekt homogene Magnetfeld am Ort der Ca-Lampe zurückzuführen. Das Licht ist also nicht perfekt zirkular bzw. in B-Feld Richtung polarisiert, weshalb ein vollständiges Ausblenden im Experiment nicht zu beobachten ist."
|
| 107 |
-
|
|
|
|
| 108 |
{
|
| 109 |
"topic": "Bestimmung des Zeemanshifts und Datenaufbereitung",
|
| 110 |
-
"text": "Die Messdaten bei verschiedene Stromstärken wurden jeweils in einem Plot dargestellt. Um für den Fit möglichst saubere Messkurven des Spektrums zu verwenden, wurde die Messreihe bei $I=8 A$ nicht in die Datenauswertung einbezogen, da die Aufspaltung der Cadmiumlinie nur schwer zu beobachten war. Das gleich gilt für die 8. Interferenzodnung, die nicht berücksichtigt wurde. Für die Datenauswertung fließen also die Nullte bis 7. Ordnung jeweils bei 9 bis 13 Ampere ein.\nAls Funktion um die Messdaten zu fitten wurde ein Pseudo-Voigt-Profil verwendet. Die drei Kurven einer Ordnung wurden hierbei gemeinsam mit der Summe dreier Pseudo-Voigt-Profile gefittet. In Figure 4 sind exemplarisch anhand der Daten für $I=12 A$ die Messdaten und der abschnittsweise Fit zu erkennen."
|
| 111 |
-
|
|
|
|
| 112 |
{
|
| 113 |
"topic": "Fehleranalyse der Fitparameter und Verzerrungseffekte",
|
| 114 |
-
"text": "Anhand der Fitparameter wird die Position der $\\sigma$ und $\\pi$ Linien bestimmt. Die Fehler der Fitparameter sind extrem klein $(\\approx 0,1 p x)$ und eigenen sich nicht als realistische Fehler für unsere weitere Rechnung. Als minimalen Fehler nehmen wir daher die Auflösung der Kamera an ( $1 p x$ ) und skalieren alle Fehler so, dass der kleineste Fehler exakt $1 p x$ beträgt. Die anderen Fehler sind dann entsprechend linear skaliert größer. Dies berücksichtigt die unterschiedliche Qualität der Fits auf unterschiedliche Interferenz-Ordnungen, bringt die Fehler aber in einen experimentell realistischen Bereich.\nFür die Berechnung des Zeemanshifts müssen die Verzerrungseffekte der Lummer-Gehrcke-Platte beachtet werden. Hierfür wird die Position der $\\pi$-Linien gegen der Interferenzordnung $k$ der entsprechenden Linie aufgetragen. Der funktionelle Zusammenhang dieser beiden Größen wird durch eine quadratische Funktion $k=f(a)$ approximiert
|
| 115 |
-
|
|
|
|
| 116 |
{
|
| 117 |
"topic": "Berechnung der Wellenlängen- und Energieverschiebung",
|
| 118 |
-
"text": "Die Differenz zur ganzzahligen Ordnung der zugehörigen $\\pi$-Linie ergibt $\\delta k$. Für eine (kleine) Wellenlängenverschiebung $\\delta \\lambda$ gilt:\n\n$$\\n\\delta \\lambda=\\frac{\\delta k}{\\Delta k} \\cdot \\frac{\\lambda^{2}}{2 d \\cdot \\sqrt{n^{2}-1}}\n$$\n\nFür den Abstand $\\Delta k$ zweier Ordnungen gilt $\\Delta k=1$. Für die Wellenlänge $\\lambda$ der betrachten Linie verwenden wir den in Part 2 bestimmten Wert von $\\lambda=$ $(643,842 \\pm 0,007) \\mathrm{nm}$.\nWir kennen nun die Wellenlänge des Zeemanshift für jede von uns betrachtete Linie. Mit dem Zusammenhang zwischen Wellenlänge und Energie $E=\\frac{h c}{\\lambda}$ lässt sich nun die Energieverschiebung der Linine bestimmen. Wir nehmen an, dass die Wellenlängenverschiebung $\\delta \\lambda$ klein gegenüber der absoluten Wellenlänge $\\lambda$ ist, und erhalten daher für die Energieverschiebung $\\delta E$ in guter Näherung:\n\n$$\\n\\delta E=\\frac{h c}{\\lambda^{2}} \\delta \\lambda\n$$"
|
| 119 |
-
|
|
|
|
| 120 |
{
|
| 121 |
"topic": "Bestimmung des Bohrschen Magnetons aus experimentellen Daten",
|
| 122 |
-
"text": "Abschließend nehmen wir den Durchschnitt aller Werte $\\delta E$ für eine Stromstärke $I$.\n\n### 3.2 Bestimmen des Bohrschen Magnetons $\\mu_{B}$ \n\nFür die Energieverschiebung beim Zeemaneffekt gilt:\n\n$$\n\\delta E=\\mu_{B} \\cdot m_{l} \\cdot B\n$$\n\nDa es sich bei der betrachteten Cadmiumlinie um einen ${ }^{1} D_{2} \\rightarrow{ }^{1} P_{1}$ Übergang handelt gilt hier $m_{l}= \\pm 1$. Somit folgt für das Bohrsche Magneton $\\mu_{B}$ als Funktion des Spulenstroms $I$ :\n\n$$\n\\mu_{B}(I)=\\frac{\\delta E(I)}{B(I)}\n$$\n\nDie Magnetfeldstärke $B(I)$ wurde hier anhand der Messwerte aus Teil 1 des Experiments bestimmt.\nWir erhalten für jeden Spulenstrom $I$ einen experimentell bestimmten Wert des Bohrschen Magnetons $\\mu_{B}$. Unsere Ergebnisse sind in Figure 6 graphisch dargestellt."
|
| 123 |
-
|
|
|
|
| 124 |
{
|
| 125 |
"topic": "Vergleich des experimentellen Werts mit dem Literaturwert",
|
| 126 |
-
"text": "Für den experimentellen Mittelwert erhalten wir:\n\n$$\n\\mu_{B, \\exp }=(10,1 \\pm 0.8) \\cdot 10^{-24} \\frac{J}{T}\n$$\n\nDer Literaturwert beträgt:\n\n$$\n\\mu_{B, l i t}=9,27400949 \\cdot 10^{-24} \\frac{J}{T}\n$$\n\nUnsere experimentell ermittelte Wert weicht also um 1,2 Sigma vom Literaturwert ab. Die Abweichung ist folglich nicht signifikant."
|
| 127 |
-
|
|
|
|
| 128 |
{
|
| 129 |
"topic": "Kritische Betrachtung der Ergebnisse und Fehlerquellen",
|
| 130 |
-
"text": "Erfreulicherweise scheint unsere experimentelle Methode keine signifikante Abweichung zwischen Literaturwert und experimentellem Wert des Bohrschen Magnetons zu ergeben. Wir befinden uns mit unserem Wert im niedirgen 2-SigmaIntervall. Dennoch ist kritisch anzumerken, dass wir einen vergleichsweise großen realtiven Fehler auf unser Messergebnis von $7,1 \\%$ erhalten. Das bedeutet, unsere Abweichung ist zwar nicht sigifikant, dennoch weicht unser experimenteller Wert um knapp $10 \\%$ vom Literaturwert ab. Der verwendete experimentelle Aufbau ist folglich nur bedingt für eine exakte Bestimmung des Bohrschen Magnetons geeigent.\n\nDie beiden dominierenden Fehlerquellen sind zum einen die Bestimmung des Magnetfeldes B am Ort der Cadmium Lampe (Inhomogenitäten, exakte Platzierung der Lampe) und zum anderen die Wahl der Fehler der Positionen der $\\pi$ - und $\\sigma$-Linien im Spektrum.\nZum Vergleich: Legt man den Fehler prinzipiell für alle Linien auf $1 p x$, also die maximale Auflösung der Kamera, fest und verzichtet auf eine Skalierung der Fehler, beträgt die Abweichung des exp. Werts zum Literaturwert schon 2,8 Sigma. Wählt man analog für den Fehler der Linien $2 p x$, da beispielsweise ein Maximum auch exakt zwischen zwei Pixelreihen liegen kann, liegt die Abweichung bei 1,4 Sigma."
|
| 131 |
-
|
|
|
|
| 132 |
{
|
| 133 |
"topic": "Quantitative Spektrumsbetrachtung und Wellenlängenbestimmung der Cd-Linie",
|
| 134 |
-
"text": "Zunächst wird der Untergrund von den Messdaten abgezogen, um Störungen durch Rauschen oder Sondereffekte wie kosmische Strahlung oder Umgebungsquellen zu eliminieren. Sollten sich in den Spektren negative Werte befinden, ist dies auf zufällige Unterschiede im Rauschen zurückzuführen. Anhand bekannter Linien des Neonspektrums werden den Pixeln nun Wellenlängen zugeordnet. Hierfür wurde der Bereich des Neonspektrums aufgenommen, in dem sich auch die rote Linie des Cadmiumspektrums befindet. In 7 sieht man das Neonspektrum und die Peaks, an die jeweils ein Voigt-Profil gelegt wurde. Jetzt kann man den identifizierten Linien ihre jeweilige Wellenlänge zuordnen und einen polynomiellen Zusammenhang finden. Wir haben uns für eine Gerade entschieden, die wie in Figure 8 zu sehen gut zu den Daten passt.\nSchließlich wird ein Voigt-Profil an die gemessene rote Cd-Linie gelegt, wie in Figure 9 gezeigt. Umrechnung anhand der Kalibrierung führt auf einen Wert von $\\lambda_{C d}=(643,842 \\pm 0,007) \\mathrm{nm}$. Dies befindet sich im $1 \\sigma$-Bereich des Literaturwertes von $\\lambda_{L i t}=643,84695 \\mathrm{~nm}$. Der Fehler ist Ergebnis der Gauß'schen Fehlerfortpflanzung."
|
| 135 |
-
|
|
|
|
| 136 |
{
|
| 137 |
"topic": "Kritische Betrachtung der Genauigkeit und systematischer Fehler",
|
| 138 |
-
"text": "Messwert und theoretische Vorhersage für die bestimmte Linie stimmen innerhalb statistischer Schwankungen überein. Dies ist umso interessanter, wenn man die Unsicherheit des Messergebnisses betrachtet, die kleiner als 0,002\\% ist. Der absolute Fehler ist, wenn man die Steigung der Kalibrationsgeraden betrachtet, kleiner als 1px. Er besteht ausschließlich aus Abweichungen der numerischen Fits. Berücksichtigt man Ungenauigkeiten des CMOS Sensors oder die Möglichkeit, dass je nach Lage des Messwerts auch eine Abweichung um weniger als 1px eine größere Messwertschwankung verursachen kann, da die Pixel nur diskrete Werte messen können, liegt eine nachträgliche Anpassung nahe. Skaliert man die Unsicherheit auf 1px, liegt der Fehler des Messwerts bei $0,012 \\mathrm{~nm}$. Damit ist der relative Fehler weiterhin kleiner $0,005 \\%$.\n\nZur hohen Genauigkeit trägt vor allem das gute Messverfahren bei. Spektrometer und Datenaufnahme per Computer lassen wenig Raum für Abweichungen. Wie die Daten zeigen, haben wir dabei eine Quelle für einen möglichen großen systematischen Fehler umgangen: Die Kamera wurde auf das Spektrometer nur locker aufgesteckt. Hätte sich deren Position zwischen Neon- und Cadmiummessung z.B. durch Erschütterung des Labortisches verändert, hätte die Energiekalibrierung nicht mehr zur Messung der Cadmiumlinie gepasst."
|
| 139 |
-
|
|
|
|
| 140 |
{
|
| 141 |
"topic": "Unerwartetes Verhalten durch mögliche Restmagnetisierung",
|
| 142 |
-
"text": "Abbildung 6 zeigt unerwartetes Verhalten. Obwohl der Magnet ausgeschaltet war, sind drei Maxima zu sehen, deren Flanken sehr steil abfallen. Vergleicht man mit den Messungen im Magnetfeld, ähneln sich die Strukturen. Möglich ist, dass die Eisenkernspule, in der sich die Lampe während der Messung befand eine Restmagnetisierung aufwies, die eine Aufspaltung herbeigeführt hat."
|
| 143 |
-
|
|
|
|
| 144 |
];
|
| 145 |
|
| 146 |
setDocumentData({
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
pages: ocrData.pages,
|
| 151 |
-
totalPages: ocrData.total_pages,
|
| 152 |
-
chunks: hardcodedChunks // Use hardcoded chunks instead of OCR chunks
|
| 153 |
});
|
| 154 |
|
| 155 |
} catch (error) {
|
|
@@ -165,11 +132,8 @@ export const useDocumentProcessor = () => {
|
|
| 165 |
selectedFile,
|
| 166 |
processing,
|
| 167 |
uploadProgress,
|
| 168 |
-
ocrProgress,
|
| 169 |
documentData,
|
| 170 |
-
imageCache,
|
| 171 |
handleFileChange,
|
| 172 |
-
fetchImage,
|
| 173 |
processDocument,
|
| 174 |
setSelectedFile
|
| 175 |
};
|
|
|
|
| 1 |
+
import { useState, useRef } from 'react';
|
| 2 |
|
| 3 |
export const useDocumentProcessor = () => {
|
| 4 |
const fileInputRef = useRef(null);
|
| 5 |
const [selectedFile, setSelectedFile] = useState(null);
|
| 6 |
const [processing, setProcessing] = useState(false);
|
| 7 |
const [uploadProgress, setUploadProgress] = useState(0);
|
|
|
|
| 8 |
const [documentData, setDocumentData] = useState(null);
|
|
|
|
|
|
|
| 9 |
|
| 10 |
const handleFileChange = (e) => {
|
| 11 |
setSelectedFile(e.target.files[0]);
|
| 12 |
setDocumentData(null);
|
| 13 |
setUploadProgress(0);
|
|
|
|
|
|
|
|
|
|
| 14 |
};
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
const processDocument = async () => {
|
| 17 |
if (!selectedFile) return;
|
| 18 |
|
| 19 |
setProcessing(true);
|
| 20 |
setUploadProgress(0);
|
|
|
|
| 21 |
|
| 22 |
try {
|
| 23 |
// Step 1: Upload PDF
|
|
|
|
| 31 |
});
|
| 32 |
|
| 33 |
if (!uploadResponse.ok) {
|
| 34 |
+
const errorText = await uploadResponse.text();
|
| 35 |
+
console.error('Upload failed:', uploadResponse.status, errorText);
|
| 36 |
throw new Error('Failed to upload PDF');
|
| 37 |
}
|
| 38 |
|
| 39 |
+
const responseText = await uploadResponse.text();
|
| 40 |
+
console.log('Raw response:', responseText);
|
| 41 |
+
const uploadData = JSON.parse(responseText);
|
| 42 |
setUploadProgress(100);
|
| 43 |
|
| 44 |
+
// Brief pause to show completion
|
| 45 |
+
await new Promise(resolve => setTimeout(resolve, 200));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
// Use hardcoded chunks for the document
|
| 48 |
const hardcodedChunks = [
|
| 49 |
+
{
|
| 50 |
"topic": "Magnetfeldmessung und Hysterese-Analyse",
|
| 51 |
+
"text": "Zu Beginn des Versuchs haben wir mit Hilfe des Teslameters die Magnetfeldstärke B an der Position der Cd-Lampe bei verschiedenen Spulenströmen gemessen. (siehe Messwerte in Tabelle 1 im Laborbuch). In Figure 1 sind die gemessenen Feldstärken als Funktion der Stromstärke aufgetragen.\nAnhand der Fehlerbalken und der praktisch identischen Überlagerung der beiden linearen Fitgeraden für auf- und absteigende Stromstärken, wird deutlich, dass keine Hystereseeffekte vorliegen. Der lineare Fit wurde hierbei nur auf die Stromstärken bis einschl. 10A angewandt, da für größere Stromstärken das Magnetfeld nicht in direktem proportionalen Zusammenhang ansteigt. Dies ist mit Sättigungseffekten der Magnetisierung des Eisenkerns der verwendeten Spule zu erklären.",
|
| 52 |
+
"page": 1
|
| 53 |
+
},
|
| 54 |
{
|
| 55 |
"topic": "Qualitative Beobachtung des Zeeman-Effekts",
|
| 56 |
+
"text": "Mit Hilfe der CMOS Kamera wurde das Spektrum des emittierten Lichts der Cadmiumlampe unter Verwendung des Lummer Gehercke Interferometers beobachtet. Die Beobachtungen wurden in longitudinaler und transversaler Richtung zum Magnetfeld durchgeführt.",
|
| 57 |
+
"page": 2
|
| 58 |
+
},
|
| 59 |
{
|
| 60 |
"topic": "Zeeman-Effekt: Longitudinale Richtung mit Filtern",
|
| 61 |
+
"text": "## ohne Filter:\n\nEs sind deutlich zwei Linien pro Ordnung zu erkennen. Dies sind die $\\sigma^{+}$und $\\sigma^{-}$Linien. Die $\\pi$ Linie ist in longitudinaler Richtung nicht zu beobachten\n\n## mit $\\lambda / 4$-Plättchen und Polarisationsfilter:\n\nVon der Cadmiumlampe aus betrachtet wird zuerst ein $\\lambda / 4$-Plättchen und danach ein Polarisationsfilter in den Strahlengang gebracht. Je nach Ausrichtung der Filter zueinander wird nun eine der beiden Linien ausgeblendet.\n\n$$\n-45^{\\circ} \\text { Winkel: }\n$$\n\nStehen $\\lambda / 4$-Plättchen und Polarisationsfilter zueinander im $-45^{\\circ}$ Winkel, wird das zirkular polarisierte Licht der $\\sigma^{-}$Linie um $45^{\\circ}$ verschoben linear polarisiert und somit vom Polarisationsfilter abgeschirmt. Folglich ist in dieser Konstellation nur die linke der beiden $\\sigma$ Linien zu beobachten.\n\n$$\n+45^{\\circ} \\text { Winkel: }\n$$\n\nStehen $\\lambda / 4$-Plättchen und Polarisationsfilter zueinander im $+45^{\\circ}$ Winkel, ist nach analogem Prinzip wie zuvor nur die rechte Linie auf dem Kamerabild zu beobachten.",
|
| 62 |
+
"page": 2
|
| 63 |
+
},
|
| 64 |
{
|
| 65 |
"topic": "Zeeman-Effekt: Transversale Richtung und Polarisation",
|
| 66 |
+
"text": "## ohne Filter:\n\nEs sind deutlich drei Linien pro Ordnung zu erkennen. Dies sind die $\\sigma^{+}, \\pi$ und $\\sigma^{-}$Linien.\n\n## mit Polarisationsfilter horizontal (in B-Feld Richtung):\n\nDie beiden $\\sigma$-Linien sind vollständig ausgeblendet. Die $\\pi-$ Linie ist deutlich sichtbar.\nmit Polarisationsfilter vertikal $\\left(90^{\\circ}\\right.$ zu B-Feld Richtung):\nDie beiden $\\sigma$-Linien sind klar sichtbar. Die $\\pi$-Linie ist ausgeblendet.\n\nWie in Figure 3 gut zu erkennen ist, sind die ausgeblendeten Linien in beiden Konfigurationen weiterhin leicht sichtbar. Dies ist auf das nicht perfekt homogene Magnetfeld am Ort der Ca-Lampe zurückzuführen. Das Licht ist also nicht perfekt zirkular bzw. in B-Feld Richtung polarisiert, weshalb ein vollständiges Ausblenden im Experiment nicht zu beobachten ist.",
|
| 67 |
+
"page": 3
|
| 68 |
+
},
|
| 69 |
{
|
| 70 |
"topic": "Bestimmung des Zeemanshifts und Datenaufbereitung",
|
| 71 |
+
"text": "Die Messdaten bei verschiedene Stromstärken wurden jeweils in einem Plot dargestellt. Um für den Fit möglichst saubere Messkurven des Spektrums zu verwenden, wurde die Messreihe bei $I=8 A$ nicht in die Datenauswertung einbezogen, da die Aufspaltung der Cadmiumlinie nur schwer zu beobachten war. Das gleich gilt für die 8. Interferenzodnung, die nicht berücksichtigt wurde. Für die Datenauswertung fließen also die Nullte bis 7. Ordnung jeweils bei 9 bis 13 Ampere ein.\nAls Funktion um die Messdaten zu fitten wurde ein Pseudo-Voigt-Profil verwendet. Die drei Kurven einer Ordnung wurden hierbei gemeinsam mit der Summe dreier Pseudo-Voigt-Profile gefittet. In Figure 4 sind exemplarisch anhand der Daten für $I=12 A$ die Messdaten und der abschnittsweise Fit zu erkennen.",
|
| 72 |
+
"page": 4
|
| 73 |
+
},
|
| 74 |
{
|
| 75 |
"topic": "Fehleranalyse der Fitparameter und Verzerrungseffekte",
|
| 76 |
+
"text": "Anhand der Fitparameter wird die Position der $\\sigma$ und $\\pi$ Linien bestimmt. Die Fehler der Fitparameter sind extrem klein $(\\approx 0,1 p x)$ und eigenen sich nicht als realistische Fehler für unsere weitere Rechnung. Als minimalen Fehler nehmen wir daher die Auflösung der Kamera an ( $1 p x$ ) und skalieren alle Fehler so, dass der kleineste Fehler exakt $1 p x$ beträgt. Die anderen Fehler sind dann entsprechend linear skaliert größer. Dies berücksichtigt die unterschiedliche Qualität der Fits auf unterschiedliche Interferenz-Ordnungen, bringt die Fehler aber in einen experimentell realistischen Bereich.\nFür die Berechnung des Zeemanshifts müssen die Verzerrungseffekte der Lummer-Gehrcke-Platte beachtet werden. Hierfür wird die Position der $\\pi$-Linien gegen der Interferenzordnung $k$ der entsprechenden Linie aufgetragen. Der funktionelle Zusammenhang dieser beiden Größen wird durch eine quadratische Funktion $k=f(a)$ approximiert: \n\n $k=f(a)=b a^{2}+c a+d$",
|
| 77 |
+
"page": 4
|
| 78 |
+
},
|
| 79 |
{
|
| 80 |
"topic": "Berechnung der Wellenlängen- und Energieverschiebung",
|
| 81 |
+
"text": "Die Differenz zur ganzzahligen Ordnung der zugehörigen $\\pi$-Linie ergibt $\\delta k$. Für eine (kleine) Wellenlängenverschiebung $\\delta \\lambda$ gilt:\n\n$$\\n\\delta \\lambda=\\frac{\\delta k}{\\Delta k} \\cdot \\frac{\\lambda^{2}}{2 d \\cdot \\sqrt{n^{2}-1}}\n$$\n\nFür den Abstand $\\Delta k$ zweier Ordnungen gilt $\\Delta k=1$. Für die Wellenlänge $\\lambda$ der betrachten Linie verwenden wir den in Part 2 bestimmten Wert von $\\lambda=$ $(643,842 \\pm 0,007) \\mathrm{nm}$.\nWir kennen nun die Wellenlänge des Zeemanshift für jede von uns betrachtete Linie. Mit dem Zusammenhang zwischen Wellenlänge und Energie $E=\\frac{h c}{\\lambda}$ lässt sich nun die Energieverschiebung der Linine bestimmen. Wir nehmen an, dass die Wellenlängenverschiebung $\\delta \\lambda$ klein gegenüber der absoluten Wellenlänge $\\lambda$ ist, und erhalten daher für die Energieverschiebung $\\delta E$ in guter Näherung:\n\n$$\\n\\delta E=\\frac{h c}{\\lambda^{2}} \\delta \\lambda\n$$",
|
| 82 |
+
"page": 5
|
| 83 |
+
},
|
| 84 |
{
|
| 85 |
"topic": "Bestimmung des Bohrschen Magnetons aus experimentellen Daten",
|
| 86 |
+
"text": "Abschließend nehmen wir den Durchschnitt aller Werte $\\delta E$ für eine Stromstärke $I$.\n\n### 3.2 Bestimmen des Bohrschen Magnetons $\\mu_{B}$ \n\nFür die Energieverschiebung beim Zeemaneffekt gilt:\n\n$$\n\\delta E=\\mu_{B} \\cdot m_{l} \\cdot B\n$$\n\nDa es sich bei der betrachteten Cadmiumlinie um einen ${ }^{1} D_{2} \\rightarrow{ }^{1} P_{1}$ Übergang handelt gilt hier $m_{l}= \\pm 1$. Somit folgt für das Bohrsche Magneton $\\mu_{B}$ als Funktion des Spulenstroms $I$ :\n\n$$\n\\mu_{B}(I)=\\frac{\\delta E(I)}{B(I)}\n$$\n\nDie Magnetfeldstärke $B(I)$ wurde hier anhand der Messwerte aus Teil 1 des Experiments bestimmt.\nWir erhalten für jeden Spulenstrom $I$ einen experimentell bestimmten Wert des Bohrschen Magnetons $\\mu_{B}$. Unsere Ergebnisse sind in Figure 6 graphisch dargestellt.",
|
| 87 |
+
"page": 6
|
| 88 |
+
},
|
| 89 |
{
|
| 90 |
"topic": "Vergleich des experimentellen Werts mit dem Literaturwert",
|
| 91 |
+
"text": "Für den experimentellen Mittelwert erhalten wir:\n\n$$\n\\mu_{B, \\exp }=(10,1 \\pm 0.8) \\cdot 10^{-24} \\frac{J}{T}\n$$\n\nDer Literaturwert beträgt:\n\n$$\n\\mu_{B, l i t}=9,27400949 \\cdot 10^{-24} \\frac{J}{T}\n$$\n\nUnsere experimentell ermittelte Wert weicht also um 1,2 Sigma vom Literaturwert ab. Die Abweichung ist folglich nicht signifikant.",
|
| 92 |
+
"page": 6
|
| 93 |
+
},
|
| 94 |
{
|
| 95 |
"topic": "Kritische Betrachtung der Ergebnisse und Fehlerquellen",
|
| 96 |
+
"text": "Erfreulicherweise scheint unsere experimentelle Methode keine signifikante Abweichung zwischen Literaturwert und experimentellem Wert des Bohrschen Magnetons zu ergeben. Wir befinden uns mit unserem Wert im niedirgen 2-SigmaIntervall. Dennoch ist kritisch anzumerken, dass wir einen vergleichsweise großen realtiven Fehler auf unser Messergebnis von $7,1 \\%$ erhalten. Das bedeutet, unsere Abweichung ist zwar nicht sigifikant, dennoch weicht unser experimenteller Wert um knapp $10 \\%$ vom Literaturwert ab. Der verwendete experimentelle Aufbau ist folglich nur bedingt für eine exakte Bestimmung des Bohrschen Magnetons geeigent.\n\nDie beiden dominierenden Fehlerquellen sind zum einen die Bestimmung des Magnetfeldes B am Ort der Cadmium Lampe (Inhomogenitäten, exakte Platzierung der Lampe) und zum anderen die Wahl der Fehler der Positionen der $\\pi$ - und $\\sigma$-Linien im Spektrum.\nZum Vergleich: Legt man den Fehler prinzipiell für alle Linien auf $1 p x$, also die maximale Auflösung der Kamera, fest und verzichtet auf eine Skalierung der Fehler, beträgt die Abweichung des exp. Werts zum Literaturwert schon 2,8 Sigma. Wählt man analog für den Fehler der Linien $2 p x$, da beispielsweise ein Maximum auch exakt zwischen zwei Pixelreihen liegen kann, liegt die Abweichung bei 1,4 Sigma.",
|
| 97 |
+
"page": 7
|
| 98 |
+
},
|
| 99 |
{
|
| 100 |
"topic": "Quantitative Spektrumsbetrachtung und Wellenlängenbestimmung der Cd-Linie",
|
| 101 |
+
"text": "Zunächst wird der Untergrund von den Messdaten abgezogen, um Störungen durch Rauschen oder Sondereffekte wie kosmische Strahlung oder Umgebungsquellen zu eliminieren. Sollten sich in den Spektren negative Werte befinden, ist dies auf zufällige Unterschiede im Rauschen zurückzuführen. Anhand bekannter Linien des Neonspektrums werden den Pixeln nun Wellenlängen zugeordnet. Hierfür wurde der Bereich des Neonspektrums aufgenommen, in dem sich auch die rote Linie des Cadmiumspektrums befindet. In 7 sieht man das Neonspektrum und die Peaks, an die jeweils ein Voigt-Profil gelegt wurde. Jetzt kann man den identifizierten Linien ihre jeweilige Wellenlänge zuordnen und einen polynomiellen Zusammenhang finden. Wir haben uns für eine Gerade entschieden, die wie in Figure 8 zu sehen gut zu den Daten passt.\nSchließlich wird ein Voigt-Profil an die gemessene rote Cd-Linie gelegt, wie in Figure 9 gezeigt. Umrechnung anhand der Kalibrierung führt auf einen Wert von $\\lambda_{C d}=(643,842 \\pm 0,007) \\mathrm{nm}$. Dies befindet sich im $1 \\sigma$-Bereich des Literaturwertes von $\\lambda_{L i t}=643,84695 \\mathrm{~nm}$. Der Fehler ist Ergebnis der Gauß'schen Fehlerfortpflanzung.",
|
| 102 |
+
"page": 8
|
| 103 |
+
},
|
| 104 |
{
|
| 105 |
"topic": "Kritische Betrachtung der Genauigkeit und systematischer Fehler",
|
| 106 |
+
"text": "Messwert und theoretische Vorhersage für die bestimmte Linie stimmen innerhalb statistischer Schwankungen überein. Dies ist umso interessanter, wenn man die Unsicherheit des Messergebnisses betrachtet, die kleiner als 0,002\\% ist. Der absolute Fehler ist, wenn man die Steigung der Kalibrationsgeraden betrachtet, kleiner als 1px. Er besteht ausschließlich aus Abweichungen der numerischen Fits. Berücksichtigt man Ungenauigkeiten des CMOS Sensors oder die Möglichkeit, dass je nach Lage des Messwerts auch eine Abweichung um weniger als 1px eine größere Messwertschwankung verursachen kann, da die Pixel nur diskrete Werte messen können, liegt eine nachträgliche Anpassung nahe. Skaliert man die Unsicherheit auf 1px, liegt der Fehler des Messwerts bei $0,012 \\mathrm{~nm}$. Damit ist der relative Fehler weiterhin kleiner $0,005 \\%$.\n\nZur hohen Genauigkeit trägt vor allem das gute Messverfahren bei. Spektrometer und Datenaufnahme per Computer lassen wenig Raum für Abweichungen. Wie die Daten zeigen, haben wir dabei eine Quelle für einen möglichen großen systematischen Fehler umgangen: Die Kamera wurde auf das Spektrometer nur locker aufgesteckt. Hätte sich deren Position zwischen Neon- und Cadmiummessung z.B. durch Erschütterung des Labortisches verändert, hätte die Energiekalibrierung nicht mehr zur Messung der Cadmiumlinie gepasst.",
|
| 107 |
+
"page": 9
|
| 108 |
+
},
|
| 109 |
{
|
| 110 |
"topic": "Unerwartetes Verhalten durch mögliche Restmagnetisierung",
|
| 111 |
+
"text": "Abbildung 6 zeigt unerwartetes Verhalten. Obwohl der Magnet ausgeschaltet war, sind drei Maxima zu sehen, deren Flanken sehr steil abfallen. Vergleicht man mit den Messungen im Magnetfeld, ähneln sich die Strukturen. Möglich ist, dass die Eisenkernspule, in der sich die Lampe während der Messung befand eine Restmagnetisierung aufwies, die eine Aufspaltung herbeigeführt hat.",
|
| 112 |
+
"page": 9
|
| 113 |
+
}
|
| 114 |
];
|
| 115 |
|
| 116 |
setDocumentData({
|
| 117 |
+
filename: uploadData.filename || selectedFile.name,
|
| 118 |
+
filePath: uploadData.file_path,
|
| 119 |
+
chunks: hardcodedChunks
|
|
|
|
|
|
|
|
|
|
| 120 |
});
|
| 121 |
|
| 122 |
} catch (error) {
|
|
|
|
| 132 |
selectedFile,
|
| 133 |
processing,
|
| 134 |
uploadProgress,
|
|
|
|
| 135 |
documentData,
|
|
|
|
| 136 |
handleFileChange,
|
|
|
|
| 137 |
processDocument,
|
| 138 |
setSelectedFile
|
| 139 |
};
|
frontend/src/lib/utils.js
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { clsx } from "clsx";
|
| 2 |
+
import { twMerge } from "tailwind-merge"
|
| 3 |
+
|
| 4 |
+
export function cn(...inputs) {
|
| 5 |
+
return twMerge(clsx(inputs));
|
| 6 |
+
}
|
frontend/src/utils/markdownComponents.jsx
CHANGED
|
@@ -1,84 +1,8 @@
|
|
| 1 |
-
import ImageComponent from '../components/ImageComponent';
|
| 2 |
-
|
| 3 |
-
export const getDocumentMarkdownComponents = (documentData, fetchImage, imageCache, setImageCache) => ({
|
| 4 |
-
h1: ({ children }) => <h1 style={{ fontSize: '1.5rem', fontWeight: 'bold', marginBottom: '1rem', color: '#1a202c' }}>{children}</h1>,
|
| 5 |
-
h2: ({ children }) => <h2 style={{ fontSize: '1.25rem', fontWeight: 'bold', marginBottom: '0.75rem', marginTop: '1.5rem', color: '#1a202c' }}>{children}</h2>,
|
| 6 |
-
h3: ({ children }) => <h3 style={{ fontSize: '1.125rem', fontWeight: 'bold', marginBottom: '0.5rem', marginTop: '1rem', color: '#1a202c' }}>{children}</h3>,
|
| 7 |
-
p: ({ children }) => <p style={{ marginBottom: '0.75rem', color: '#374151', lineHeight: '1.5', fontSize: '0.875rem' }}>{children}</p>,
|
| 8 |
-
hr: () => <hr style={{ margin: '1.5rem 0', borderColor: '#d1d5db' }} />,
|
| 9 |
-
ul: ({ children }) => <ul style={{ marginBottom: '0.75rem', marginLeft: '1.25rem', listStyleType: 'disc', fontSize: '0.875rem' }}>{children}</ul>,
|
| 10 |
-
ol: ({ children }) => <ol style={{ marginBottom: '0.75rem', marginLeft: '1.25rem', listStyleType: 'decimal', fontSize: '0.875rem' }}>{children}</ol>,
|
| 11 |
-
li: ({ children }) => <li style={{ marginBottom: '0.125rem', color: '#374151' }}>{children}</li>,
|
| 12 |
-
blockquote: ({ children }) => (
|
| 13 |
-
<blockquote style={{ borderLeft: '3px solid #3b82f6', paddingLeft: '0.75rem', fontStyle: 'italic', margin: '0.75rem 0', color: '#6b7280', fontSize: '0.875rem' }}>
|
| 14 |
-
{children}
|
| 15 |
-
</blockquote>
|
| 16 |
-
),
|
| 17 |
-
code: ({ inline, children }) =>
|
| 18 |
-
inline ?
|
| 19 |
-
<code style={{ backgroundColor: '#f3f4f6', padding: '0.125rem 0.25rem', borderRadius: '0.25rem', fontSize: '0.75rem', fontFamily: 'monospace' }}>{children}</code> :
|
| 20 |
-
<pre style={{ backgroundColor: '#f3f4f6', padding: '0.75rem', borderRadius: '0.375rem', overflowX: 'auto', margin: '0.75rem 0' }}>
|
| 21 |
-
<code style={{ fontSize: '0.75rem', fontFamily: 'monospace' }}>{children}</code>
|
| 22 |
-
</pre>,
|
| 23 |
-
div: ({ children, style }) => (
|
| 24 |
-
<div style={style}>
|
| 25 |
-
{children}
|
| 26 |
-
</div>
|
| 27 |
-
),
|
| 28 |
-
img: ({ src, alt }) => (
|
| 29 |
-
<ImageComponent
|
| 30 |
-
src={src}
|
| 31 |
-
alt={alt}
|
| 32 |
-
fileId={documentData?.fileId}
|
| 33 |
-
imageCache={imageCache}
|
| 34 |
-
onImageCached={(imageId, imageData) => {
|
| 35 |
-
setImageCache(prev => ({
|
| 36 |
-
...prev,
|
| 37 |
-
[imageId]: imageData
|
| 38 |
-
}));
|
| 39 |
-
}}
|
| 40 |
-
/>
|
| 41 |
-
)
|
| 42 |
-
});
|
| 43 |
-
|
| 44 |
-
export const getChunkMarkdownComponents = (documentData, fetchImage, imageCache, setImageCache) => ({
|
| 45 |
-
h1: ({ children }) => <h1 style={{ fontSize: '1.25rem', fontWeight: 'bold', marginBottom: '0.75rem', color: '#1a202c' }}>{children}</h1>,
|
| 46 |
-
h2: ({ children }) => <h2 style={{ fontSize: '1.125rem', fontWeight: 'bold', marginBottom: '0.5rem', marginTop: '1rem', color: '#1a202c' }}>{children}</h2>,
|
| 47 |
-
h3: ({ children }) => <h3 style={{ fontSize: '1rem', fontWeight: 'bold', marginBottom: '0.5rem', marginTop: '0.75rem', color: '#1a202c' }}>{children}</h3>,
|
| 48 |
-
p: ({ children }) => <p style={{ marginBottom: '0.5rem', color: '#374151', lineHeight: '1.4', fontSize: '0.875rem' }}>{children}</p>,
|
| 49 |
-
hr: () => <hr style={{ margin: '1rem 0', borderColor: '#d1d5db' }} />,
|
| 50 |
-
ul: ({ children }) => <ul style={{ marginBottom: '0.5rem', marginLeft: '1rem', listStyleType: 'disc', fontSize: '0.875rem' }}>{children}</ul>,
|
| 51 |
-
ol: ({ children }) => <ol style={{ marginBottom: '0.5rem', marginLeft: '1rem', listStyleType: 'decimal', fontSize: '0.875rem' }}>{children}</ol>,
|
| 52 |
-
li: ({ children }) => <li style={{ marginBottom: '0.125rem', color: '#374151' }}>{children}</li>,
|
| 53 |
-
blockquote: ({ children }) => (
|
| 54 |
-
<blockquote style={{ borderLeft: '2px solid #9ca3af', paddingLeft: '0.5rem', fontStyle: 'italic', margin: '0.5rem 0', color: '#6b7280', fontSize: '0.875rem' }}>
|
| 55 |
-
{children}
|
| 56 |
-
</blockquote>
|
| 57 |
-
),
|
| 58 |
-
code: ({ inline, children }) =>
|
| 59 |
-
inline ?
|
| 60 |
-
<code style={{ backgroundColor: '#f3f4f6', padding: '0.125rem 0.25rem', borderRadius: '0.25rem', fontSize: '0.75rem', fontFamily: 'monospace' }}>{children}</code> :
|
| 61 |
-
<pre style={{ backgroundColor: '#f3f4f6', padding: '0.5rem', borderRadius: '0.25rem', overflowX: 'auto', margin: '0.5rem 0' }}>
|
| 62 |
-
<code style={{ fontSize: '0.75rem', fontFamily: 'monospace' }}>{children}</code>
|
| 63 |
-
</pre>,
|
| 64 |
-
img: ({ src, alt }) => (
|
| 65 |
-
<ImageComponent
|
| 66 |
-
src={src}
|
| 67 |
-
alt={alt}
|
| 68 |
-
fileId={documentData?.fileId}
|
| 69 |
-
imageCache={imageCache}
|
| 70 |
-
onImageCached={(imageId, imageData) => {
|
| 71 |
-
setImageCache(prev => ({
|
| 72 |
-
...prev,
|
| 73 |
-
[imageId]: imageData
|
| 74 |
-
}));
|
| 75 |
-
}}
|
| 76 |
-
/>
|
| 77 |
-
)
|
| 78 |
-
});
|
| 79 |
-
|
| 80 |
export const getChatMarkdownComponents = () => ({
|
| 81 |
p: ({ children }) => <p className="mb-2 text-gray-800 leading-relaxed">{children}</p>,
|
|
|
|
|
|
|
|
|
|
| 82 |
ul: ({ children }) => <ul className="mb-2 ml-4 list-disc">{children}</ul>,
|
| 83 |
ol: ({ children }) => <ol className="mb-2 ml-4 list-decimal">{children}</ol>,
|
| 84 |
li: ({ children }) => <li className="mb-1 text-gray-800">{children}</li>,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
export const getChatMarkdownComponents = () => ({
|
| 2 |
p: ({ children }) => <p className="mb-2 text-gray-800 leading-relaxed">{children}</p>,
|
| 3 |
+
h1: ({ children }) => <h1 className="text-xl font-bold mb-3 text-gray-900">{children}</h1>,
|
| 4 |
+
h2: ({ children }) => <h2 className="text-lg font-bold mb-2 text-gray-900">{children}</h2>,
|
| 5 |
+
h3: ({ children }) => <h3 className="text-base font-bold mb-2 text-gray-900">{children}</h3>,
|
| 6 |
ul: ({ children }) => <ul className="mb-2 ml-4 list-disc">{children}</ul>,
|
| 7 |
ol: ({ children }) => <ol className="mb-2 ml-4 list-decimal">{children}</ol>,
|
| 8 |
li: ({ children }) => <li className="mb-1 text-gray-800">{children}</li>,
|
frontend/src/utils/markdownUtils.js
DELETED
|
@@ -1,33 +0,0 @@
|
|
| 1 |
-
export const highlightChunkInMarkdown = (markdown, chunks, currentChunkIndex) => {
|
| 2 |
-
if (!chunks || !chunks[currentChunkIndex] || !markdown) {
|
| 3 |
-
return markdown;
|
| 4 |
-
}
|
| 5 |
-
|
| 6 |
-
const chunk = chunks[currentChunkIndex];
|
| 7 |
-
const chunkText = markdown.slice(chunk.start_position, chunk.end_position);
|
| 8 |
-
|
| 9 |
-
console.log('Chunk debugging:', {
|
| 10 |
-
chunkIndex: currentChunkIndex,
|
| 11 |
-
startPos: chunk.start_position,
|
| 12 |
-
endPos: chunk.end_position,
|
| 13 |
-
chunkTextLength: chunkText.length,
|
| 14 |
-
chunkTextPreview: chunkText.substring(0, 50) + '...',
|
| 15 |
-
beforeText: markdown.slice(Math.max(0, chunk.start_position - 20), chunk.start_position),
|
| 16 |
-
afterText: markdown.slice(chunk.end_position, chunk.end_position + 20)
|
| 17 |
-
});
|
| 18 |
-
|
| 19 |
-
// Use markdown blockquote which preserves structure while providing visual distinction
|
| 20 |
-
const lines = chunkText.split('\n');
|
| 21 |
-
const highlightedLines = lines.map(line => {
|
| 22 |
-
if (line.trim() === '') return '>'; // Empty blockquote line
|
| 23 |
-
return '> ' + line;
|
| 24 |
-
});
|
| 25 |
-
|
| 26 |
-
const highlightedChunk = '\n\n> **Current Learning Section**\n>\n' +
|
| 27 |
-
highlightedLines.join('\n') +
|
| 28 |
-
'\n\n';
|
| 29 |
-
|
| 30 |
-
return markdown.slice(0, chunk.start_position) +
|
| 31 |
-
highlightedChunk +
|
| 32 |
-
markdown.slice(chunk.end_position);
|
| 33 |
-
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
frontend/vite.config.js
CHANGED
|
@@ -7,11 +7,7 @@ export default defineConfig({
|
|
| 7 |
server: {
|
| 8 |
proxy: {
|
| 9 |
'/upload_pdf': 'http://localhost:8000',
|
| 10 |
-
'/process_ocr': 'http://localhost:8000',
|
| 11 |
-
'/get_image': 'http://localhost:8000',
|
| 12 |
-
'/chunk_page': 'http://localhost:8000',
|
| 13 |
-
'/start_chunk_lesson': 'http://localhost:8000',
|
| 14 |
'/api': 'http://localhost:8000'
|
| 15 |
}
|
| 16 |
}
|
| 17 |
-
})
|
|
|
|
| 7 |
server: {
|
| 8 |
proxy: {
|
| 9 |
'/upload_pdf': 'http://localhost:8000',
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
'/api': 'http://localhost:8000'
|
| 11 |
}
|
| 12 |
}
|
| 13 |
+
})
|
test_fuzzy_find.py
DELETED
|
@@ -1,194 +0,0 @@
|
|
| 1 |
-
#%%
|
| 2 |
-
import matplotlib.pyplot as plt
|
| 3 |
-
from difflib import SequenceMatcher
|
| 4 |
-
import numpy as np
|
| 5 |
-
|
| 6 |
-
def fuzzy_find(text, pattern, start_pos=0):
|
| 7 |
-
"""Find the best fuzzy match for pattern in text starting from start_pos"""
|
| 8 |
-
best_ratio = 0
|
| 9 |
-
best_pos = -1
|
| 10 |
-
|
| 11 |
-
# Search in sliding windows
|
| 12 |
-
pattern_len = len(pattern)
|
| 13 |
-
for i in range(start_pos, len(text) - pattern_len + 1):
|
| 14 |
-
window = text[i:i + pattern_len]
|
| 15 |
-
ratio = SequenceMatcher(None, pattern.lower(), window.lower()).ratio()
|
| 16 |
-
|
| 17 |
-
if ratio > best_ratio and ratio > 0.8: # Much stricter: 80% similarity
|
| 18 |
-
best_ratio = ratio
|
| 19 |
-
best_pos = i
|
| 20 |
-
|
| 21 |
-
return best_pos if best_pos != -1 else None
|
| 22 |
-
|
| 23 |
-
def analyze_fuzzy_ratios(markdown_text, chunk_text):
|
| 24 |
-
"""
|
| 25 |
-
Analyze fuzzy matching ratios across the entire markdown text using a rolling window.
|
| 26 |
-
Returns positions and their corresponding similarity ratios.
|
| 27 |
-
"""
|
| 28 |
-
chunk_len = len(chunk_text)
|
| 29 |
-
positions = []
|
| 30 |
-
ratios = []
|
| 31 |
-
|
| 32 |
-
# Rolling window over the entire markdown text
|
| 33 |
-
for i in range(len(markdown_text) - chunk_len + 1):
|
| 34 |
-
window = markdown_text[i:i + chunk_len]
|
| 35 |
-
ratio = SequenceMatcher(None, chunk_text.lower(), window.lower()).ratio()
|
| 36 |
-
positions.append(i)
|
| 37 |
-
ratios.append(ratio)
|
| 38 |
-
|
| 39 |
-
return positions, ratios
|
| 40 |
-
|
| 41 |
-
def plot_ratio_distribution(positions, ratios, chunk_text, markdown_file_path=None):
|
| 42 |
-
"""
|
| 43 |
-
Create a plot showing the similarity ratio distribution across positions.
|
| 44 |
-
"""
|
| 45 |
-
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10))
|
| 46 |
-
|
| 47 |
-
# Main plot: ratio vs position
|
| 48 |
-
ax1.plot(positions, ratios, 'b-', alpha=0.7, linewidth=1)
|
| 49 |
-
ax1.axhline(y=0.8, color='r', linestyle='--', label='Fuzzy find threshold (0.8)')
|
| 50 |
-
ax1.set_xlabel('Position in Markdown Text')
|
| 51 |
-
ax1.set_ylabel('Similarity Ratio')
|
| 52 |
-
ax1.set_title(f'Fuzzy Match Similarity Ratios Across Text\n(Chunk length: {len(chunk_text)} chars)')
|
| 53 |
-
ax1.grid(True, alpha=0.3)
|
| 54 |
-
ax1.legend()
|
| 55 |
-
|
| 56 |
-
# Highlight maximum ratio
|
| 57 |
-
max_ratio = max(ratios)
|
| 58 |
-
max_pos = positions[ratios.index(max_ratio)]
|
| 59 |
-
ax1.plot(max_pos, max_ratio, 'ro', markersize=8, label=f'Max ratio: {max_ratio:.3f} at pos {max_pos}')
|
| 60 |
-
ax1.legend()
|
| 61 |
-
|
| 62 |
-
# Histogram of ratios
|
| 63 |
-
ax2.hist(ratios, bins=50, alpha=0.7, edgecolor='black')
|
| 64 |
-
ax2.axvline(x=0.8, color='r', linestyle='--', label='Fuzzy find threshold (0.8)')
|
| 65 |
-
ax2.axvline(x=max_ratio, color='g', linestyle='--', label=f'Maximum ratio: {max_ratio:.3f}')
|
| 66 |
-
ax2.set_xlabel('Similarity Ratio')
|
| 67 |
-
ax2.set_ylabel('Frequency')
|
| 68 |
-
ax2.set_title('Distribution of Similarity Ratios')
|
| 69 |
-
ax2.legend()
|
| 70 |
-
ax2.grid(True, alpha=0.3)
|
| 71 |
-
|
| 72 |
-
plt.tight_layout()
|
| 73 |
-
return fig, max_ratio, max_pos
|
| 74 |
-
|
| 75 |
-
def compare_texts(original_chunk, found_text, max_pos):
|
| 76 |
-
"""
|
| 77 |
-
Compare the original chunk text with the text found by fuzzy_find.
|
| 78 |
-
Shows character-by-character differences and similarity analysis.
|
| 79 |
-
"""
|
| 80 |
-
print("\n" + "="*80)
|
| 81 |
-
print("TEXT COMPARISON: Original Chunk vs Fuzzy Find Result")
|
| 82 |
-
print("="*80)
|
| 83 |
-
|
| 84 |
-
print(f"\nOriginal chunk length: {len(original_chunk)} characters")
|
| 85 |
-
print(f"Found text length: {len(found_text)} characters")
|
| 86 |
-
print(f"Found at position: {max_pos}")
|
| 87 |
-
|
| 88 |
-
# Calculate overall similarity
|
| 89 |
-
similarity = SequenceMatcher(None, original_chunk.lower(), found_text.lower()).ratio()
|
| 90 |
-
print(f"Overall similarity: {similarity:.4f} ({similarity*100:.2f}%)")
|
| 91 |
-
|
| 92 |
-
# Show first 200 characters of each
|
| 93 |
-
print(f"\nOriginal chunk (first 200 chars):")
|
| 94 |
-
print(f"'{original_chunk}{'...' if len(original_chunk) > 200 else ''}'")
|
| 95 |
-
|
| 96 |
-
print(f"\nFound text (first 200 chars):")
|
| 97 |
-
print(f"'{found_text}{'...' if len(found_text) > 200 else ''}'")
|
| 98 |
-
|
| 99 |
-
# Character-by-character analysis for first 100 characters
|
| 100 |
-
print(f"\nCharacter-by-character comparison (first 100 chars):")
|
| 101 |
-
print("Original: ", end="")
|
| 102 |
-
for i, char in enumerate(original_chunk[:100]):
|
| 103 |
-
if i < len(found_text) and char.lower() == found_text[i].lower():
|
| 104 |
-
print(char, end="") # Same character
|
| 105 |
-
else:
|
| 106 |
-
print(f"[{char}]", end="") # Different character
|
| 107 |
-
print()
|
| 108 |
-
|
| 109 |
-
print("Found: ", end="")
|
| 110 |
-
for i, char in enumerate(found_text[:100]):
|
| 111 |
-
if i < len(original_chunk) and char.lower() == original_chunk[i].lower():
|
| 112 |
-
print(char, end="") # Same character
|
| 113 |
-
else:
|
| 114 |
-
print(f"[{char}]", end="") # Different character
|
| 115 |
-
print()
|
| 116 |
-
|
| 117 |
-
# Analyze differences
|
| 118 |
-
matcher = SequenceMatcher(None, original_chunk, found_text)
|
| 119 |
-
differences = []
|
| 120 |
-
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
| 121 |
-
if tag != 'equal':
|
| 122 |
-
differences.append({
|
| 123 |
-
'type': tag,
|
| 124 |
-
'original_pos': (i1, i2),
|
| 125 |
-
'found_pos': (j1, j2),
|
| 126 |
-
'original_text': original_chunk[i1:i2],
|
| 127 |
-
'found_text': found_text[j1:j2]
|
| 128 |
-
})
|
| 129 |
-
|
| 130 |
-
print(f"\nFound {len(differences)} differences:")
|
| 131 |
-
for i, diff in enumerate(differences[:10]): # Show first 10 differences
|
| 132 |
-
print(f"{i+1}. {diff['type'].upper()} at original[{diff['original_pos'][0]}:{diff['original_pos'][1]}] -> found[{diff['found_pos'][0]}:{diff['found_pos'][1]}]")
|
| 133 |
-
if diff['original_text']:
|
| 134 |
-
print(f" Original: '{diff['original_text'][:50]}{'...' if len(diff['original_text']) > 50 else ''}'")
|
| 135 |
-
if diff['found_text']:
|
| 136 |
-
print(f" Found: '{diff['found_text'][:50]}{'...' if len(diff['found_text']) > 50 else ''}'")
|
| 137 |
-
|
| 138 |
-
if len(differences) > 10:
|
| 139 |
-
print(f" ... and {len(differences) - 10} more differences")
|
| 140 |
-
|
| 141 |
-
return similarity, differences
|
| 142 |
-
|
| 143 |
-
def run_fuzzy_analysis():
|
| 144 |
-
"""
|
| 145 |
-
Main function to run the fuzzy find analysis.
|
| 146 |
-
You can modify the markdown_text and chunk_text variables below.
|
| 147 |
-
"""
|
| 148 |
-
|
| 149 |
-
# TODO: Replace these with your actual markdown content and chunk
|
| 150 |
-
markdown_text = """# An improved method for mobile characterisation of $\\delta^{13} \\mathrm{CH}_{4}$ source signatures and its application in Germany \n\nAntje Hoheisel ${ }^{1}$, Christiane Yeman ${ }^{1, a}$, Florian Dinger ${ }^{1,2}$, Henrik Eckhardt ${ }^{1}$, and Martina Schmidt ${ }^{1}$<br>${ }^{1}$ Institute of Environmental Physics, Heidelberg University, Heidelberg, Germany<br>${ }^{2}$ Max Planck Institute for Chemistry, Mainz, Germany<br>${ }^{a}$ now at: Laboratory of Ion Beam Physics, ETH Zurich, Zurich, Switzerland\n\nCorrespondence: Antje Hoheisel (antje.hoheisel@iup.uni-heidelberg.de)\nReceived: 7 August 2018 - Discussion started: 1 October 2018\nRevised: 17 January 2019 - Accepted: 28 January 2019 - Published: 22 February 2019\n\n\n#### Abstract\n\nThe carbon isotopic signature $\\left(\\delta^{13} \\mathrm{CH}_{4}\\right)$ of several methane sources in Germany (around Heidelberg and in North Rhine-Westphalia) were characterised. Mobile measurements of the plume of $\\mathrm{CH}_{4}$ sources are carried out using an analyser based on cavity ring-down spectroscopy (CRDS). To achieve precise results a CRDS analyser, which measures methane $\\left(\\mathrm{CH}_{4}\\right)$, carbon dioxide $\\left(\\mathrm{CO}_{2}\\right)$ and their ${ }^{13} \\mathrm{C}$-to- ${ }^{12} \\mathrm{C}$ ratios, was characterised especially with regard to cross sensitivities of composition differences of the gas matrix in air samples or calibration tanks. The two most important gases which affect $\\delta^{13} \\mathrm{CH}_{4}$ are water vapour $\\left(\\mathrm{H}_{2} \\mathrm{O}\\right)$ and ethane $\\left(\\mathrm{C}_{2} \\mathrm{H}_{6}\\right)$. To avoid the cross sensitivity with $\\mathrm{H}_{2} \\mathrm{O}$, the air is dried with a Nafion dryer during mobile measurements. $\\mathrm{C}_{2} \\mathrm{H}_{6}$ is typically abundant in natural gases and thus in methane plumes or samples originating from natural gas. $\\mathrm{A}_{2} \\mathrm{H}_{6}$ correction and calibration are essential to obtain accurate $\\delta^{13} \\mathrm{CH}_{4}$ results, which can deviate by up to $3 \\%$ depending on whether a $\\mathrm{C}_{2} \\mathrm{H}_{6}$ correction is applied.\n\nThe isotopic signature is determined with the Miller-Tans approach and the York fitting method. During 21 field campaigns the mean $\\delta^{13} \\mathrm{CH}_{4}$ signatures of three dairy farms $\\left(-63.9 \\pm 0.9 \\%_{e}\\right)$, a biogas plant $\\left(-62.4 \\pm 1.2 \\%_{e}\\right)$, a landfill $\\left(-58.7 \\pm 3.3 \\%_{e}\\right)$, a wastewater treatment plant $(-52.5 \\pm$ $1.4 \\%$ ), an active deep coal mine ( $-56.0 \\pm 2.3 \\%$ ) and two natural gas storage and gas compressor stations ( $-46.1 \\pm$ $0.8 \\%$ ) were recorded.\n\nIn addition, between December 2016 and November 2018 gas samples from the Heidelberg natural gas distribution network were measured with a mean $\\delta^{13} \\mathrm{CH}_{4}$ value of $-43.3 \\pm$ $0.8 \\%$. Contrary to previous measurements between 1991\n\n\n#### Abstract\n\nand 1996 by Levin et al. (1999), no strong seasonal cycle is shown.\n\n\n## 1 Introduction\n\nMethane $\\left(\\mathrm{CH}_{4}\\right)$ is the second most important anthropogenic greenhouse gas. The atmospheric growth rate of $\\mathrm{CH}_{4}$ has changed significantly during the last decades, stabilising at zero growth from 1999 to 2006 before beginning to increase again after 2007 (Dlugokencky et al., 2009). Several studies have focused on the recent $\\mathrm{CH}_{4}$ growth caused by changes in sources and sinks (Rigby et al., 2017; Turner et al., 2017).\n\nRecent studies by Schaefer et al. (2016), Rice et al. (2016) and Nisbet et al. (2016) have shown how the $\\delta^{13} \\mathrm{CH}_{4}$ measurements can help to understand the changes in global $\\mathrm{CH}_{4}$ increase rates and to assign the related source types. The stable carbon isotope ratio $\\left({ }^{13} \\mathrm{C} /{ }^{12} \\mathrm{C}\\right)$ of $\\mathrm{CH}_{4}$ sources varies due to the initial source material and the fractionation during production and release to the atmosphere. The source categories can be classified as pyrogenic (e.g. biomass burning), biogenic (e.g. wetlands and livestock) or thermogenic (e.g. a subcategory of fossil fuel extraction), which show different but also overlapping isotope ratio ranges. Various studies have shown that the assignment of isotopic signatures from different $\\mathrm{CH}_{4}$ sources remains uncertain due to large temporal variabilities and also regional specificities (e.g. Sherwood et al., 2017). This missing knowledge may result in large uncertainties when the $\\mathrm{CH}_{4}$ budget is determined on global or regional scales using isotope-based estimates. In addition to global studies, the use of $\\delta^{13} \\mathrm{CH}_{4}$ was already successfully"""
|
| 151 |
-
|
| 152 |
-
chunk_text = """## 1 Introduction\nMethane ($\mathrm{CH}_{4}$) is the second most important anthropogenic greenhouse gas. The atmospheric growth rate of $\mathrm{CH}_{4}$ has changed significantly during the last decades, stabilising at zero growth from 1999 to 2006 before beginning to increase again after 2007 (Dlugokencky et al., 2009). Several studies have focused on the recent $\mathrm{CH}_{4}$ growth caused by changes in sources and sinks (Rigby et al., 2017; Turner et al., 2017).\n\nRecent studies by Schaefer et al. (2016), Rice et al. (2016) and Nisbet et al. (2016) have shown how the $\delta^{13} \mathrm{CH}_{4}$ measurements can help to understand the changes in global $\mathrm{CH}_{4}$ increase rates and to assign the related source types. The stable carbon isotope ratio (${}^{13}\mathrm{C}$/${}^{12}\mathrm{C}$) of $\mathrm{CH}_{4}$ sources varies due to the initial source material and the fractionation during production and release to the atmosphere. The source categories can be classified as pyrogenic (e.g. biomass burning), biogenic (e.g. wetlands and livestock) or thermogenic (e.g. a subcategory of fossil fuel extraction), which show different but also overlapping isotope ratio ranges. Various studies have shown that the assignment of isotopic signatures from different $\mathrm{CH}_{4}$ sources remains uncertain due to large temporal variabilities and also regional specificities (e.g. Sherwood et al., 2017). This missing knowledge may result in large uncertainties when the $\mathrm{CH}_{4}$ budget is determined on global or regional scales using isotope-based estimates. In addition to global studies, the use of $\delta^{13}\mathrm{CH}_{4}$ was already successfully"""
|
| 153 |
-
|
| 154 |
-
print("Analyzing fuzzy matching ratios...")
|
| 155 |
-
print(f"Markdown text length: {len(markdown_text)} characters")
|
| 156 |
-
print(f"Chunk text length: {len(chunk_text)} characters")
|
| 157 |
-
|
| 158 |
-
# Run the analysis
|
| 159 |
-
positions, ratios = analyze_fuzzy_ratios(markdown_text, chunk_text)
|
| 160 |
-
|
| 161 |
-
# Create the plot
|
| 162 |
-
fig, max_ratio, max_pos = plot_ratio_distribution(positions, ratios, chunk_text)
|
| 163 |
-
|
| 164 |
-
# Print statistics
|
| 165 |
-
print(f"\nStatistics:")
|
| 166 |
-
print(f"Maximum similarity ratio: {max_ratio:.3f}")
|
| 167 |
-
print(f"Maximum ratio position: {max_pos}")
|
| 168 |
-
print(f"Number of positions above 0.8 threshold: {sum(1 for r in ratios if r > 0.8)}")
|
| 169 |
-
print(f"Mean ratio: {np.mean(ratios):.3f}")
|
| 170 |
-
print(f"Standard deviation: {np.std(ratios):.3f}")
|
| 171 |
-
|
| 172 |
-
# Test the original fuzzy_find function
|
| 173 |
-
result = fuzzy_find(markdown_text, chunk_text)
|
| 174 |
-
print(f"\nOriginal fuzzy_find result: {result}")
|
| 175 |
-
if result is not None:
|
| 176 |
-
print(f"Found match at position {result}")
|
| 177 |
-
else:
|
| 178 |
-
print("No match found above 0.8 threshold")
|
| 179 |
-
|
| 180 |
-
# Compare the found text with the original chunk
|
| 181 |
-
if max_ratio > 0: # If we found any match
|
| 182 |
-
found_text = markdown_text[max_pos:max_pos + len(chunk_text)]
|
| 183 |
-
text_similarity, differences = compare_texts(chunk_text, found_text, max_pos)
|
| 184 |
-
print(f"\nDetailed comparison similarity: {text_similarity:.4f}")
|
| 185 |
-
print(f"Number of character differences: {len(differences)}")
|
| 186 |
-
|
| 187 |
-
plt.show()
|
| 188 |
-
return positions, ratios, max_ratio, max_pos
|
| 189 |
-
|
| 190 |
-
if __name__ == "__main__":
|
| 191 |
-
# Run the analysis
|
| 192 |
-
positions, ratios, max_ratio, max_pos = run_fuzzy_analysis()
|
| 193 |
-
|
| 194 |
-
#%%
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|