"""
Document viewer component for displaying PDFs with citation highlighting
"""
from typing import List, Dict, Optional
import json
class DocumentViewer:
"""Handle document viewing with citation highlighting"""
def __init__(self):
"""Initialize document viewer"""
self.current_document = None
self.current_highlights = []
def render_document(self, html_content: str, filename: str, highlight_paragraphs: List[str] = None) -> str:
"""
Render document with optional paragraph highlighting
Args:
html_content: HTML content of the document
filename: Name of the document
highlight_paragraphs: List of paragraph IDs to highlight
Returns:
Enhanced HTML with highlighting
"""
if not html_content:
return self._render_empty_state()
# Add highlighting script and marks
if highlight_paragraphs:
for para_id in highlight_paragraphs:
# Add highlighted class to specific paragraphs
html_content = html_content.replace(
f'
{self._create_viewer_controls(filename)}
{html_content}
"""
return enhanced_html
def _create_viewer_controls(self, filename: str) -> str:
"""
Create viewer control bar
Args:
filename: Current document filename
Returns:
HTML for controls
"""
return f"""
📄{filename}
"""
def _render_empty_state(self) -> str:
"""
Render empty state when no document is selected
Returns:
HTML for empty state
"""
return """
📄
Tidak Ada Dokumen
Upload dokumen PDF untuk melihat konten dan sitasi di sini.
"""
def create_citation_link(self, filename: str, paragraph_ids: List[str], snippet: str, page: int = None) -> str:
"""
Create clickable citation link for chat
Args:
filename: Source document filename
paragraph_ids: List of paragraph IDs this citation refers to
snippet: Text snippet to show
page: Page number (optional)
Returns:
HTML citation link
"""
para_id = paragraph_ids[0] if paragraph_ids else "unknown"
page_info = f" (Hal. {page})" if page else ""
citation_html = f"""
📄 {filename}{page_info}
"{snippet[:150]}..."
"""
return citation_html
def format_sources_with_links(self, sources: List[Dict]) -> tuple[str, List[str]]:
"""
Format sources as interactive citations
Args:
sources: List of source metadata from RAG pipeline
Returns:
Tuple of (HTML string, list of paragraph IDs to highlight)
"""
if not sources:
return "", []
all_paragraph_ids = []
html = "
"
html += "
📚 Sumber Referensi:
"
for i, source in enumerate(sources, 1):
filename = source.get('filename', 'Unknown')
chunk_text = source.get('chunk_text', '')
# Parse paragraph IDs if available
paragraph_ids_str = source.get('paragraph_ids', '[]')
try:
if isinstance(paragraph_ids_str, str):
paragraph_ids = json.loads(paragraph_ids_str)
else:
paragraph_ids = paragraph_ids_str if isinstance(paragraph_ids_str, list) else []
except:
paragraph_ids = []
# Parse pages
pages_str = source.get('pages', '[]')
try:
if isinstance(pages_str, str):
pages = json.loads(pages_str)
else:
pages = pages_str if isinstance(pages_str, list) else []
except:
pages = []
page = pages[0] if pages else None
# Track all paragraph IDs for highlighting
all_paragraph_ids.extend(paragraph_ids)
# Create citation link
html += self.create_citation_link(
filename=filename,
paragraph_ids=paragraph_ids,
snippet=chunk_text,
page=page
)
html += "
"
return html, list(set(all_paragraph_ids)) # Return unique paragraph IDs
def create_document_selector(self, documents: List[Dict], current_doc: str = None) -> str:
"""
Create dropdown selector for documents
Args:
documents: List of document metadata
current_doc: Currently selected document filename
Returns:
HTML for document selector
"""
if not documents:
return "