ZhangNy's picture
Add Space app files
75db650
"""Citation helpers (numbered citations like [1], [2], ...)."""
from __future__ import annotations
import re
from typing import Any, Dict, List, Tuple
class CitationManager:
def __init__(self, *, max_content_length: int = 900):
self.max_content_length = int(max_content_length)
self.documents: List[Dict[str, Any]] = []
self.doc_id_to_index: Dict[str, int] = {}
def clear(self) -> None:
self.documents = []
self.doc_id_to_index = {}
def add_document(self, document: Dict[str, Any]) -> int:
doc_id = document.get("doc_id") or ""
if doc_id in self.doc_id_to_index:
return self.doc_id_to_index[doc_id]
self.documents.append(document)
idx = len(self.documents)
self.doc_id_to_index[doc_id] = idx
return idx
def add_documents(self, documents: List[Dict[str, Any]]) -> List[int]:
return [self.add_document(d) for d in documents]
@staticmethod
def parse_citations_in_text(text: str) -> List[int]:
matches = re.findall(r"\[(\d+)\]", text or "")
out = []
for m in matches:
try:
out.append(int(m))
except Exception:
continue
return out
def validate_citations(self, text: str) -> Tuple[bool, List[int]]:
cited = self.parse_citations_in_text(text or "")
invalid = [i for i in cited if i < 1 or i > len(self.documents)]
return (len(invalid) == 0), invalid
def get_statistics(self) -> Dict[str, Any]:
counts: Dict[str, int] = {}
for d in self.documents:
st = d.get("source_type", "unknown") or "unknown"
counts[st] = counts.get(st, 0) + 1
return {"total": len(self.documents), "source_type_counts": counts}