File size: 25,210 Bytes
0a4529c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
# DEPENDENCIES
from typing import List
from typing import Optional
from collections import defaultdict
from config.settings import get_settings
from config.models import ChunkWithScore
from config.models import DocumentChunk
from config.logging_config import get_logger
from utils.error_handler import handle_errors
from chunking.token_counter import get_token_counter
from utils.error_handler import ContextAssemblyError
from retrieval.citation_tracker import CitationTracker


# Setup Settings and Logging
settings = get_settings()
logger   = get_logger(__name__)


class ContextAssembler:
    """
    Context assembly and optimization: Assembles retrieved chunks into optimal context
    for LLM processing with token limits and quality optimization
    """
    def __init__(self, max_context_tokens: int = None, strategy: str = "score_based"):
        """
        Initialize context assembler
        
        Arguments:
        ----------
            max_context_tokens { int } : Maximum tokens for assembled context (default from settings)
            
            strategy           { str } : Assembly strategy ('score_based', 'diversity', 'sequential')
        """
        self.logger             = logger
        self.settings           = get_settings()
        self.max_context_tokens = max_context_tokens or (settings.CONTEXT_WINDOW - 1000)
        self.strategy           = strategy
        self.citation_tracker   = CitationTracker()
        
        # Strategy configurations
        self.strategy_configs   = {"score_based" : {"diversity_penalty": 0.1, "min_chunk_score": 0.1},
                                   "diversity"   : {"diversity_penalty": 0.3, "min_chunk_score": 0.05},
                                   "sequential"  : {"diversity_penalty": 0.0, "min_chunk_score": 0.0},
                                  }
        
        self.logger.info(f"ContextAssembler initialized: max_tokens={self.max_context_tokens}, strategy={strategy}")
    

    def assemble_context(self, chunks: List[ChunkWithScore], query: str = "", include_citations: bool = True, format_for_llm: bool = True) -> str:
        """
        Assemble context from retrieved chunks
        
        Arguments:
        ----------
            chunks             { list } : List of retrieved chunks

            query              { str }  : Original query (for relevance optimization)
            
            include_citations  { bool } : Include citation markers in context
            
            format_for_llm     { bool } : Format context for LLM consumption
        
        Returns:
        --------
                    { str }             : Assembled context string
        """
        if not chunks:
            self.logger.warning("No chunks provided for context assembly")
            return ""
        
        try:
            self.logger.info(f"Starting context assembly with {len(chunks)} chunks")
            
            # Filter and sort chunks based on strategy
            filtered_chunks = self._filter_chunks(chunks = chunks)
            self.logger.info(f"After filtering: {len(filtered_chunks)} chunks")
            
            # Ensure we have chunks after filtering
            if not filtered_chunks:
                self.logger.error("All chunks filtered out - using top chunk from original list")
                filtered_chunks = [chunks[0]]
            
            sorted_chunks = self._sort_chunks(chunks = filtered_chunks, 
                                              query  = query,
                                             )
            
            # Select chunks within token limit
            selected_chunks = self._select_chunks_by_tokens(chunks = sorted_chunks)
            self.logger.info(f"After token selection: {len(selected_chunks)} chunks selected")
            
            # Validate selection results with progressive fallback
            if not selected_chunks:
                self.logger.warning("Token selection returned 0 chunks - using progressive fallback")
                
                # Fallback 1 - Try with higher token budget (allow 10% overflow)
                if sorted_chunks:
                    overflow_budget         = int(self.max_context_tokens * 1.10)
                    self.logger.info(f"Fallback 1: Allowing {overflow_budget} tokens (10% overflow)")
                    
                    old_budget              = self.max_context_tokens
                    self.max_context_tokens = overflow_budget
                    
                    selected_chunks         = self._select_chunks_by_tokens(chunks = sorted_chunks)
                    self.max_context_tokens = old_budget
                
                # Fallback 2 - Force include top chunks regardless of token count
                if not selected_chunks and sorted_chunks:
                    self.logger.warning("Fallback 2: Force including top 3 chunks")

                    selected_chunks = sorted_chunks[:min(3, len(sorted_chunks))]
                
                # Fallback 3 - Use first available chunk
                if not selected_chunks and chunks:
                    self.logger.error("Fallback 3: Emergency - using only first chunk")
                    
                    selected_chunks = [chunks[0]]
                
                # Complete failure
                if not selected_chunks:
                    self.logger.error("All fallbacks exhausted - no chunks available")
                    
                    raise ContextAssemblyError("No valid chunks available after all selection attempts")
            
            # Assemble context
            if format_for_llm:
                context = self._format_for_llm(chunks            = selected_chunks, 
                                               include_citations = include_citations,
                                              )
            
            else:
                context = self._format_simple(chunks            = selected_chunks,       
                                              include_citations = include_citations,
                                             )
            
            # Validate assembled context
            if not context or not context.strip():
                self.logger.error("Assembled context is empty")
                
                raise ContextAssemblyError("Context assembly produced empty result")
            
            context_tokens = self._count_tokens(text = context)
            
            self.logger.info(f"Assembled context: {len(selected_chunks)} chunks, {context_tokens} tokens, {len(context)} chars")
            
            # Warn if context exceeds limit
            if (context_tokens > self.max_context_tokens):
                overflow_pct = ((context_tokens - self.max_context_tokens) / self.max_context_tokens) * 100

                self.logger.warning(f"Context exceeds limit by {overflow_pct:.1f}% ({context_tokens} > {self.max_context_tokens})")
            
            return context
        
        except ContextAssemblyError:
            # Re-raise context assembly errors
            raise
        
        except Exception as e:
            self.logger.error(f"Context assembly failed with unexpected error: {repr(e)}", exc_info = True)
            
            # Emergency fallback: return first chunk text only
            if (chunks and len(chunks) > 0):
                self.logger.warning("Emergency fallback: returning first chunk text only")
                
                return chunks[0].chunk.text
            
            # No chunks available at all
            raise ContextAssemblyError(f"Context assembly failed with no fallback available: {repr(e)}")


    def _filter_chunks(self, chunks: List[ChunkWithScore]) -> List[ChunkWithScore]:
        """
        Filter chunks based on quality and strategy
        """
        if not chunks:
            return []
        
        strategy_config = self.strategy_configs.get(self.strategy, self.strategy_configs["score_based"])
        min_score       = strategy_config["min_chunk_score"]
        
        # Don't filter if all scores are below threshold
        all_below_threshold = all(chunk.score < min_score for chunk in chunks)
        
        if all_below_threshold:
            self.logger.warning(f"All chunks below min_score {min_score}, keeping top chunks anyway")
            # Keep at least top 5 chunks regardless of score
            filtered = sorted(chunks, key = lambda x: x.score, reverse = True)[:5]
        
        else:
            filtered = [chunk for chunk in chunks if chunk.score >= min_score]
        
        self.logger.info(f"Filtered {len(chunks)} -> {len(filtered)} chunks (min_score={min_score})")
        
        # Remove very similar chunks if diversity is important
        if (strategy_config["diversity_penalty"] > 0):
            filtered = self._apply_diversity_filter(filtered, strategy_config["diversity_penalty"])
        
        return filtered
    

    def _apply_diversity_filter(self, chunks: List[ChunkWithScore], diversity_penalty: float) -> List[ChunkWithScore]:
        """
        Apply diversity filtering to reduce redundancy - FIXED
        
        Arguments:
        ----------
            chunks            { list }  : Chunks to filter
            
            diversity_penalty { float } : Penalty factor for same-document chunks
        
        Returns:
        --------
                       { list }         : Diversified chunks
        """
        if (len(chunks) <= 1):
            return chunks
        
        # Simple diversity: penalize chunks from same document
        document_scores = dict()
        
        for chunk in chunks:
            doc_id = chunk.chunk.document_id
            if doc_id not in document_scores:
                document_scores[doc_id] = []
            
            document_scores[doc_id].append(chunk.score)
        
        # Adjust scores based on document diversity
        diversified = list()

        for chunk in chunks:
            doc_id          = chunk.chunk.document_id
            doc_chunk_count = len(document_scores[doc_id])
            
            # Penalize if multiple chunks from same document
            penalty         = diversity_penalty * (doc_chunk_count - 1)
            adjusted_score  = max(0.0, chunk.score - penalty)
            
            if (adjusted_score > 0):
                diversified_chunk = ChunkWithScore(chunk            = chunk.chunk,
                                                   score            = adjusted_score,
                                                   rank             = chunk.rank,
                                                   retrieval_method = chunk.retrieval_method + "_diversified"
                                                  )

                diversified.append(diversified_chunk)
        
        # Re-sort by adjusted scores
        diversified.sort(key = lambda x: x.score, reverse = True)
        
        # Re-normalize scores to maintain 0-1 range after penalty
        if diversified:
            max_score = max(chunk.score for chunk in diversified)
            
            if (max_score > 0):
                for chunk in diversified:
                    chunk.score = chunk.score / max_score
        
        return diversified
    

    def _sort_chunks(self, chunks: List[ChunkWithScore], query: str) -> List[ChunkWithScore]:
        """
        Sort chunks based on strategy
        """
        if (self.strategy == "sequential"):
            # Sort by document order and position
            chunks.sort(key = lambda x: (x.chunk.document_id, 
                                         x.chunk.page_number or 0,
                                         x.chunk.chunk_index
                                        )
                       )

        else:
            # Default: sort by score (already sorted by retrieval)
            chunks.sort(key = lambda x: x.score, reverse = True)
        
        return chunks
    

    def _select_chunks_by_tokens(self, chunks: List[ChunkWithScore]) -> List[ChunkWithScore]:
        """
        Select chunks that fit within token limit - IMPROVED VERSION
        
        Arguments:
        ----------
            chunks { list } : Chunks to select from
        
        Returns:
        --------
            { list }        : Selected chunks within token limit
        """
        if not chunks:
            self.logger.error("No chunks provided to _select_chunks_by_tokens")
            return []
        
        selected     = list()
        total_tokens = 0
        
        self.logger.info(f"Token selection: {len(chunks)} chunks, max={self.max_context_tokens} tokens")
        
        # Reserve tokens for system prompt and formatting overhead
        formatting_overhead = 200  
        available_tokens    = self.max_context_tokens - formatting_overhead
        
        self.logger.debug(f"Available tokens after overhead: {available_tokens}")
        
        for i, chunk in enumerate(chunks):
            # Get or calculate chunk tokens
            chunk_tokens = chunk.chunk.token_count
            
            if (chunk_tokens is None) or (chunk_tokens <= 0):
                chunk_text = chunk.chunk.text if chunk.chunk.text else ""
                
                if not chunk_text:
                    self.logger.warning(f"Chunk {i} has no text content, skipping")
                    continue
                
                chunk_tokens = self._count_tokens(text = chunk_text)
                self.logger.debug(f"Chunk {i} calculated: {chunk_tokens} tokens from {len(chunk_text)} chars")
            
            # Reserve tokens for chunk separators and citations
            chunk_formatting = 25 
            total_needed     = total_tokens + chunk_tokens + chunk_formatting
            
            if (total_needed <= available_tokens):
                selected.append(chunk)

                total_tokens += chunk_tokens + chunk_formatting
                
                self.logger.debug(f"Chunk {i}: score={chunk.score:.3f}, tokens={chunk_tokens}, total={total_tokens}/{available_tokens}")
            
            else:
                # Calculate remaining space
                remaining = available_tokens - total_tokens - chunk_formatting
                
                self.logger.debug(f"Chunk {i} exceeds limit: needs {chunk_tokens}, have {remaining} remaining")
                
                # Try partial chunk if we have reasonable space (at least 200 tokens)
                if (remaining >= 200):
                    self.logger.info(f"Attempting partial chunk {i} with {remaining} available tokens")
                    
                    partial_chunk = self._create_partial_chunk(chunk             = chunk,
                                                               available_tokens  = remaining,
                                                              )
                    
                    if partial_chunk:
                        selected.append(partial_chunk)
                        partial_tokens = self._count_tokens(text = partial_chunk.chunk.text)
                        total_tokens  += partial_tokens + chunk_formatting
                        
                        self.logger.info(f"Added partial chunk {i}: {partial_tokens} tokens")
                
                # Stop adding chunks - no more space
                self.logger.info(f"Stopping chunk selection at index {i}")
                break
        
        # Log selection summary
        utilization = (total_tokens / self.max_context_tokens * 100) if (self.max_context_tokens > 0) else 0
        
        self.logger.info(f"Token selection complete:")
        self.logger.info(f"- Selected: {len(selected)}/{len(chunks)} chunks")
        self.logger.info(f"- Tokens: {total_tokens}/{self.max_context_tokens} ({utilization:.1f}% utilization)")
        
        # Warning if selection is poor
        if ((len(selected) == 0) and (len(chunks) > 0)):
            self.logger.error(f"- ZERO chunks selected from {len(chunks)} available!")
            self.logger.error(f"- Max tokens: {self.max_context_tokens}")
            self.logger.error(f"- Available after overhead: {available_tokens}")
            self.logger.error(f"- First chunk tokens: {chunks[0].chunk.token_count or 'unknown'}")
            
            # Diagnostic: check if first chunk is too large
            if chunks[0].chunk.text:
                first_chunk_tokens = self._count_tokens(chunks[0].chunk.text)
                self.logger.error(f"- First chunk actual tokens: {first_chunk_tokens}")
                
                if first_chunk_tokens > available_tokens:
                    self.logger.error(f"- First chunk ({first_chunk_tokens} tokens) exceeds available space ({available_tokens} tokens)")
        
        return selected
    

    def _create_partial_chunk(self, chunk: ChunkWithScore, available_tokens: int) -> Optional[ChunkWithScore]:
        """
        Create a partial chunk that fits within available tokens
        """
        full_text    = chunk.chunk.text
        
        # Try to truncate at sentence boundary
        sentences    = full_text.split('. ')
        partial_text = ""
        
        for sentence in sentences:
            test_text   = partial_text + sentence + ". "
            test_tokens = self._count_tokens(text = test_text)
            
            if (test_tokens <= available_tokens):
                partial_text = test_text

            else:
                break
        
        if partial_text:
            partial_chunk_obj = DocumentChunk(chunk_id      = chunk.chunk.chunk_id + "_partial",
                                              document_id   = chunk.chunk.document_id,
                                              text          = partial_text.strip(),
                                              embedding     = chunk.chunk.embedding,
                                              chunk_index   = chunk.chunk.chunk_index,
                                              start_char    = chunk.chunk.start_char,
                                              end_char      = chunk.chunk.start_char + len(partial_text.strip()),
                                              page_number   = chunk.chunk.page_number,
                                              section_title = chunk.chunk.section_title,
                                              token_count   = self._count_tokens(text = partial_text.strip()),
                                              metadata      = chunk.chunk.metadata,
                                             )
            # Create partial chunk
            partial_chunk     = ChunkWithScore(chunk            = partial_chunk_obj,
                                               score            = chunk.score * 0.8,
                                               rank             = chunk.rank,
                                               retrieval_method = chunk.retrieval_method + "_partial",
                                              )

            
            return partial_chunk
        
        return None
    

    def _format_for_llm(self, chunks: List[ChunkWithScore], include_citations: bool) -> str:
        """
        Format context for LLM consumption with citations
        """
        context_parts = list()

        for i, chunk_with_score in enumerate(chunks, 1):
            chunk           = chunk_with_score.chunk
            
            # Build citation marker
            citation_marker = f"[{i}]" if include_citations else ""
            
            # Build source info
            source_info     = list()

            if chunk.page_number:
                source_info.append(f"Page {chunk.page_number}")
            
            if chunk.section_title:
                source_info.append(f"Section: {chunk.section_title}")
            
            source_str = f"({', '.join(source_info)})" if source_info else ""
            
            # Format chunk
            if include_citations and source_info:
                chunk_text = f"{citation_marker} {source_str}\n{chunk.text}"
            
            elif include_citations:
                chunk_text = f"{citation_marker}\n{chunk.text}"
            
            else:
                chunk_text = chunk.text
            
            context_parts.append(chunk_text)
        
        return "\n\n".join(context_parts)
    

    def _format_simple(self, chunks: List[ChunkWithScore], include_citations: bool) -> str:
        """
        Simple formatting without extensive metadata
        """
        context_parts = list()

        for i, chunk_with_score in enumerate(chunks, 1):
            chunk = chunk_with_score.chunk
            
            if include_citations:
                context_parts.append(f"[{i}] {chunk.text}")
            
            else:
                context_parts.append(chunk.text)
        
        return "\n\n".join(context_parts)
    

    def _count_tokens(self, text: str) -> int:
        """
        Count tokens in text with conservative fallback
        
        Arguments:
        ----------
            text { str } : Text to count tokens for
        
        Returns:
        --------
            { int }      : Token count
        """
        if not text:
            return 0
        
        try:
            token_counter = get_token_counter()

            return token_counter.count_tokens(text)
        
        except Exception as e:
            # Conservative fallback calculation for technical text
            self.logger.debug(f"Token counter error, using conservative approximation: {repr(e)}")
            
            # More accurate approximation for technical/scientific text: Count words (split by whitespace)
            words = text.split()
            
            # Technical text has more subword tokenization: Average: 1 word ≈ 1.8 tokens for technical English (conservative)
            estimated_tokens = int(len(words) * 1.8)
            
            # Add overhead for punctuation, numbers, special chars (15%)
            estimated_tokens = int(estimated_tokens * 1.15)
            
            # Add safety margin (10%)
            estimated_tokens = int(estimated_tokens * 1.10)
            
            # Ensure minimum reasonable value
            estimated_tokens = max(10, estimated_tokens)
            
            self.logger.debug(f"Conservative estimate: {len(words)} words → {estimated_tokens} tokens")
            
            return estimated_tokens
    

    def optimize_context_quality(self, context: str, chunks: List[ChunkWithScore]) -> str:
        """
        Optimize context quality by removing redundancies and improving flow
        """
        # Remove duplicate sentences
        sentences        = context.split('. ')
        unique_sentences = list()

        for sentence in sentences:
            sentence_clean = sentence.strip()
            
            if (sentence_clean and (sentence_clean not in unique_sentences)):
                unique_sentences.append(sentence_clean)
        
        optimized = '. '.join(unique_sentences)
        
        # Ensure proper citation consistency
        if '[' in optimized:
            optimized = self.citation_tracker.ensure_citation_consistency(optimized, chunks)
        
        return optimized
    

    def get_context_statistics(self, context: str, chunks: List[ChunkWithScore]) -> dict:
        """
        Get statistics about assembled context
        """
        token_count    = self._count_tokens(text = context)
        char_count     = len(context)
        
        # Citation statistics
        citation_stats = self.citation_tracker.get_citation_statistics(context, chunks)
        
        # Source diversity
        source_docs    = set(chunk.chunk.document_id for chunk in chunks)
        source_pages   = set(chunk.chunk.page_number for chunk in chunks if chunk.chunk.page_number)
        
        return {"total_tokens"      : token_count,
                "total_chars"       : char_count,
                "chunk_count"       : len(chunks),
                "source_documents"  : len(source_docs),
                "source_pages"      : len(source_pages),
                "token_utilization" : (token_count / self.max_context_tokens * 100) if self.max_context_tokens > 0 else 0,
                "citation_stats"    : citation_stats,
                "strategy"          : self.strategy,
               }


# Global context assembler instance
_context_assembler = None


def get_context_assembler() -> ContextAssembler:
    """
    Get global context assembler instance
    """
    global _context_assembler
    
    if _context_assembler is None:
        _context_assembler = ContextAssembler()
    
    return _context_assembler


@handle_errors(error_type = ContextAssemblyError, log_error = True, reraise = False)
def assemble_context_for_llm(chunks: List[ChunkWithScore], query: str = "", **kwargs) -> str:
    """
    Convenience function for context assembly
    """
    assembler = get_context_assembler()
    return assembler.assemble_context(chunks, query, **kwargs)