Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| """Citation management for source tracking.""" | |
| from dataclasses import dataclass, field | |
| from typing import Any | |
| from urllib.parse import urlparse | |
| class Citation: | |
| """A source citation.""" | |
| title: str | |
| url: str | |
| snippet: str = "" | |
| accessed_at: str = "" | |
| reliability_score: float = 0.5 | |
| class CitationManager: | |
| """Manages citations and source tracking.""" | |
| def __init__(self): | |
| """Initialize the citation manager.""" | |
| self._citations: list[Citation] = [] | |
| self._url_index: dict[str, int] = {} | |
| def add_citation( | |
| self, | |
| title: str, | |
| url: str, | |
| snippet: str = "", | |
| ) -> int: | |
| """Add a citation and return its index. | |
| Args: | |
| title: Source title | |
| url: Source URL | |
| snippet: Relevant text snippet | |
| Returns: | |
| Citation index (1-based) | |
| """ | |
| # Check if URL already exists | |
| if url in self._url_index: | |
| return self._url_index[url] | |
| # Calculate reliability score based on domain | |
| reliability = self._assess_reliability(url) | |
| citation = Citation( | |
| title=title, | |
| url=url, | |
| snippet=snippet, | |
| reliability_score=reliability, | |
| ) | |
| self._citations.append(citation) | |
| index = len(self._citations) | |
| self._url_index[url] = index | |
| return index | |
| def get_citation(self, index: int) -> Citation | None: | |
| """Get a citation by index. | |
| Args: | |
| index: Citation index (1-based) | |
| Returns: | |
| Citation or None if not found | |
| """ | |
| if 1 <= index <= len(self._citations): | |
| return self._citations[index - 1] | |
| return None | |
| def get_all_citations(self) -> list[Citation]: | |
| """Get all citations. | |
| Returns: | |
| List of all citations | |
| """ | |
| return list(self._citations) | |
| def format_inline(self, index: int) -> str: | |
| """Format an inline citation reference. | |
| Args: | |
| index: Citation index | |
| Returns: | |
| Formatted inline citation [n] | |
| """ | |
| return f"[{index}]" | |
| def format_bibliography(self, style: str = "markdown") -> str: | |
| """Format all citations as a bibliography. | |
| Args: | |
| style: Output style (markdown, plain, html) | |
| Returns: | |
| Formatted bibliography | |
| """ | |
| if not self._citations: | |
| return "" | |
| lines = [] | |
| if style == "markdown": | |
| lines.append("**Sources:**") | |
| for i, cite in enumerate(self._citations, 1): | |
| lines.append(f"[{i}] [{cite.title}]({cite.url})") | |
| elif style == "plain": | |
| lines.append("Sources:") | |
| for i, cite in enumerate(self._citations, 1): | |
| lines.append(f"{i}. {cite.title}") | |
| lines.append(f" {cite.url}") | |
| elif style == "html": | |
| lines.append("<div class='sources'>") | |
| lines.append("<h4>Sources:</h4>") | |
| lines.append("<ol>") | |
| for cite in self._citations: | |
| lines.append( | |
| f'<li><a href="{cite.url}" target="_blank">{cite.title}</a></li>' | |
| ) | |
| lines.append("</ol>") | |
| lines.append("</div>") | |
| return "\n".join(lines) | |
| def to_dict_list(self) -> list[dict[str, Any]]: | |
| """Convert citations to list of dictionaries. | |
| Returns: | |
| List of citation dictionaries | |
| """ | |
| return [ | |
| { | |
| "title": cite.title, | |
| "url": cite.url, | |
| "snippet": cite.snippet, | |
| "reliability_score": cite.reliability_score, | |
| } | |
| for cite in self._citations | |
| ] | |
| def clear(self) -> None: | |
| """Clear all citations.""" | |
| self._citations.clear() | |
| self._url_index.clear() | |
| def _assess_reliability(self, url: str) -> float: | |
| """Assess reliability of a source based on URL. | |
| Args: | |
| url: Source URL | |
| Returns: | |
| Reliability score (0.0 - 1.0) | |
| """ | |
| try: | |
| parsed = urlparse(url) | |
| domain = parsed.netloc.lower() | |
| except Exception: | |
| return 0.3 | |
| # High reliability domains | |
| high_reliability = [ | |
| ".gov", | |
| ".edu", | |
| "wikipedia.org", | |
| "bbc.com", | |
| "reuters.com", | |
| "apnews.com", | |
| "nature.com", | |
| "sciencedirect.com", | |
| "pubmed.gov", | |
| "nytimes.com", | |
| "wsj.com", | |
| "economist.com", | |
| ] | |
| # Medium reliability domains | |
| medium_reliability = [ | |
| "medium.com", | |
| "techcrunch.com", | |
| "wired.com", | |
| "arstechnica.com", | |
| "theverge.com", | |
| "forbes.com", | |
| "bloomberg.com", | |
| ] | |
| for high in high_reliability: | |
| if high in domain: | |
| return 0.9 | |
| for medium in medium_reliability: | |
| if medium in domain: | |
| return 0.7 | |
| # Default moderate reliability for unknown sources | |
| return 0.5 | |
| def get_most_reliable(self, n: int = 3) -> list[Citation]: | |
| """Get the n most reliable citations. | |
| Args: | |
| n: Number of citations to return | |
| Returns: | |
| List of most reliable citations | |
| """ | |
| sorted_citations = sorted( | |
| self._citations, | |
| key=lambda c: c.reliability_score, | |
| reverse=True, | |
| ) | |
| return sorted_citations[:n] | |