Spaces:
Running
Running
File size: 5,562 Bytes
2da1e29 a63c61f 2da1e29 a63c61f 2da1e29 a63c61f 2da1e29 a63c61f 2da1e29 a63c61f 2da1e29 a63c61f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | from fastapi import APIRouter, Query, HTTPException, Depends, Response
from typing import Optional
from src.core.ports.vector_store_port import VectorStorePort
from src.core.ports.embedder_port import EmbedderPort
from src.core.use_cases.analytics_use_case import AnalyticsUseCase
from src.api.dependencies import get_vector_store_port, get_embedder_port, get_analytics_use_case
from src.core.domain.schemas import BrowseResponse, SearchResponse, NewsArticle
router = APIRouter()
def _dict_to_article(payload: dict, score: float = None) -> NewsArticle:
if payload is None:
payload = {}
# Safely get metadata
metadata = payload.get("metadata") if payload else {}
if metadata is None:
metadata = {}
# Title can be stored at top-level payload OR nested inside metadata
title = (
payload.get("title")
or (metadata.get("title") if isinstance(metadata, dict) else None)
)
return NewsArticle(
doc_id=payload.get("doc_id", "unknown"),
url=payload.get("url"),
title=title,
content=payload.get("text", payload.get("content", "")),
source=payload.get("source"),
published_at=payload.get("published_at"),
score=score,
metadata=metadata if isinstance(metadata, dict) else {}
)
@router.get("/latest", response_model=BrowseResponse)
def get_latest_news(
response: Response,
limit: int = Query(10, le=50),
source: Optional[str] = None,
language: Optional[str] = None,
days_back: int = Query(7, ge=1, le=30, description="Number of days to look back (default: 7)"),
vector_store: VectorStorePort = Depends(get_vector_store_port)
):
"""Get latest news articles sorted by publication date (default: last 7 days)"""
# Prevent caching of news results - always fetch fresh data
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate, max-age=0"
response.headers["Pragma"] = "no-cache"
response.headers["Expires"] = "0"
try:
result = vector_store.browse(
limit=limit,
offset=0,
source=source,
language=language,
days_back=days_back
)
# Convert Qdrant points to articles (already sorted by Qdrant)
articles = []
for p in result["articles"]:
article = _dict_to_article(p.payload or {}, getattr(p, "score", None))
articles.append(article)
# Ensure next_offset is an integer or None
next_offset = result.get("next_offset")
if next_offset is not None and not isinstance(next_offset, int):
next_offset = None # If it's not an int, set to None
return BrowseResponse(
total_returned=len(articles),
articles=articles,
next_offset=next_offset
)
except Exception as e:
# Return empty response on error instead of 500
print(f"Error fetching news: {e}")
return BrowseResponse(
total_returned=0,
articles=[],
next_offset=None
)
@router.get("/browse", response_model=BrowseResponse)
def browse_news(
limit: int = Query(20, le=100),
offset: int = 0,
source: Optional[str] = None,
language: Optional[str] = None,
vector_store: VectorStorePort = Depends(get_vector_store_port)
):
result = vector_store.browse(limit=limit, offset=offset, source=source, language=language)
# Qdrant scroll returns points with .payload
articles = []
for p in result["articles"]:
articles.append(_dict_to_article(p.payload or {}, getattr(p, "score", None)))
return BrowseResponse(
total_returned=len(articles),
articles=articles,
next_offset=result["next_offset"]
)
@router.get("/search", response_model=SearchResponse)
def search_news(
q: str = Query(..., min_length=1),
top_k: int = Query(10, le=50),
source: Optional[str] = None,
language: Optional[str] = None,
embedder: EmbedderPort = Depends(get_embedder_port),
vector_store: VectorStorePort = Depends(get_vector_store_port)
):
try:
query_vector = embedder.encode_query(q)
results = vector_store.search(
query_vectors=query_vector,
limit=top_k,
source_filter=source,
language_filter=language
)
# VectorStorePort.search returns SearchResult objects (content, metadata, score, doc_id)
articles = []
for r in results:
articles.append(_dict_to_article(r.metadata, r.score))
return SearchResponse(results=articles)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/sources")
def get_sources(analytics: AnalyticsUseCase = Depends(get_analytics_use_case)):
query = "SELECT source, count() as article_count FROM sentiment_results GROUP BY source ORDER BY article_count DESC"
res = analytics.execute_raw_query(query)
if res and res.get("rows"):
sources = [row[0] for row in res["rows"] if row[0]]
return {"sources": sources}
return {"sources": []}
@router.get("/{doc_id}", response_model=NewsArticle)
def get_news_article(doc_id: str, vector_store: VectorStorePort = Depends(get_vector_store_port)):
result = vector_store.get_by_doc_id(doc_id)
if not result:
raise HTTPException(status_code=404, detail="Article not found")
return _dict_to_article(result.metadata, result.score)
|