""" Document viewer component for displaying PDFs with citation highlighting """ from typing import List, Dict, Optional import json class DocumentViewer: """Handle document viewing with citation highlighting""" def __init__(self): """Initialize document viewer""" self.current_document = None self.current_highlights = [] def render_document(self, html_content: str, filename: str, highlight_paragraphs: List[str] = None) -> str: """ Render document with optional paragraph highlighting Args: html_content: HTML content of the document filename: Name of the document highlight_paragraphs: List of paragraph IDs to highlight Returns: Enhanced HTML with highlighting """ if not html_content: return self._render_empty_state() # Add highlighting script and marks if highlight_paragraphs: for para_id in highlight_paragraphs: # Add highlighted class to specific paragraphs html_content = html_content.replace( f'

{self._create_viewer_controls(filename)}

{html_content}
""" return enhanced_html def _create_viewer_controls(self, filename: str) -> str: """ Create viewer control bar Args: filename: Current document filename Returns: HTML for controls """ return f"""
📄 {filename}
""" def _render_empty_state(self) -> str: """ Render empty state when no document is selected Returns: HTML for empty state """ return """
📄

Tidak Ada Dokumen

Upload dokumen PDF untuk melihat konten dan sitasi di sini.

""" def create_citation_link(self, filename: str, paragraph_ids: List[str], snippet: str, page: int = None) -> str: """ Create clickable citation link for chat Args: filename: Source document filename paragraph_ids: List of paragraph IDs this citation refers to snippet: Text snippet to show page: Page number (optional) Returns: HTML citation link """ para_id = paragraph_ids[0] if paragraph_ids else "unknown" page_info = f" (Hal. {page})" if page else "" citation_html = f"""
📄 {filename}{page_info}
"{snippet[:150]}..."
""" return citation_html def format_sources_with_links(self, sources: List[Dict]) -> tuple[str, List[str]]: """ Format sources as interactive citations Args: sources: List of source metadata from RAG pipeline Returns: Tuple of (HTML string, list of paragraph IDs to highlight) """ if not sources: return "", [] all_paragraph_ids = [] html = "
" html += "

📚 Sumber Referensi:

" for i, source in enumerate(sources, 1): filename = source.get('filename', 'Unknown') chunk_text = source.get('chunk_text', '') # Parse paragraph IDs if available paragraph_ids_str = source.get('paragraph_ids', '[]') try: if isinstance(paragraph_ids_str, str): paragraph_ids = json.loads(paragraph_ids_str) else: paragraph_ids = paragraph_ids_str if isinstance(paragraph_ids_str, list) else [] except: paragraph_ids = [] # Parse pages pages_str = source.get('pages', '[]') try: if isinstance(pages_str, str): pages = json.loads(pages_str) else: pages = pages_str if isinstance(pages_str, list) else [] except: pages = [] page = pages[0] if pages else None # Track all paragraph IDs for highlighting all_paragraph_ids.extend(paragraph_ids) # Create citation link html += self.create_citation_link( filename=filename, paragraph_ids=paragraph_ids, snippet=chunk_text, page=page ) html += "
" return html, list(set(all_paragraph_ids)) # Return unique paragraph IDs def create_document_selector(self, documents: List[Dict], current_doc: str = None) -> str: """ Create dropdown selector for documents Args: documents: List of document metadata current_doc: Currently selected document filename Returns: HTML for document selector """ if not documents: return "

Belum ada dokumen yang tersedia

" html = """
""" return html