Spaces:
Running
on
Zero
Running
on
Zero
| """Citation helpers (numbered citations like [1], [2], ...).""" | |
| from __future__ import annotations | |
| import re | |
| from typing import Any, Dict, List, Tuple | |
| class CitationManager: | |
| def __init__(self, *, max_content_length: int = 900): | |
| self.max_content_length = int(max_content_length) | |
| self.documents: List[Dict[str, Any]] = [] | |
| self.doc_id_to_index: Dict[str, int] = {} | |
| def clear(self) -> None: | |
| self.documents = [] | |
| self.doc_id_to_index = {} | |
| def add_document(self, document: Dict[str, Any]) -> int: | |
| doc_id = document.get("doc_id") or "" | |
| if doc_id in self.doc_id_to_index: | |
| return self.doc_id_to_index[doc_id] | |
| self.documents.append(document) | |
| idx = len(self.documents) | |
| self.doc_id_to_index[doc_id] = idx | |
| return idx | |
| def add_documents(self, documents: List[Dict[str, Any]]) -> List[int]: | |
| return [self.add_document(d) for d in documents] | |
| def parse_citations_in_text(text: str) -> List[int]: | |
| matches = re.findall(r"\[(\d+)\]", text or "") | |
| out = [] | |
| for m in matches: | |
| try: | |
| out.append(int(m)) | |
| except Exception: | |
| continue | |
| return out | |
| def validate_citations(self, text: str) -> Tuple[bool, List[int]]: | |
| cited = self.parse_citations_in_text(text or "") | |
| invalid = [i for i in cited if i < 1 or i > len(self.documents)] | |
| return (len(invalid) == 0), invalid | |
| def get_statistics(self) -> Dict[str, Any]: | |
| counts: Dict[str, int] = {} | |
| for d in self.documents: | |
| st = d.get("source_type", "unknown") or "unknown" | |
| counts[st] = counts.get(st, 0) + 1 | |
| return {"total": len(self.documents), "source_type_counts": counts} | |