from fastapi import APIRouter, Query, HTTPException, Depends, Response from typing import Optional from src.core.ports.vector_store_port import VectorStorePort from src.core.ports.embedder_port import EmbedderPort from src.core.use_cases.analytics_use_case import AnalyticsUseCase from src.api.dependencies import get_vector_store_port, get_embedder_port, get_analytics_use_case from src.core.domain.schemas import BrowseResponse, SearchResponse, NewsArticle router = APIRouter() def _dict_to_article(payload: dict, score: float = None) -> NewsArticle: if payload is None: payload = {} # Safely get metadata metadata = payload.get("metadata") if payload else {} if metadata is None: metadata = {} # Title can be stored at top-level payload OR nested inside metadata title = ( payload.get("title") or (metadata.get("title") if isinstance(metadata, dict) else None) ) return NewsArticle( doc_id=payload.get("doc_id", "unknown"), url=payload.get("url"), title=title, content=payload.get("text", payload.get("content", "")), source=payload.get("source"), published_at=payload.get("published_at"), score=score, metadata=metadata if isinstance(metadata, dict) else {} ) @router.get("/latest", response_model=BrowseResponse) def get_latest_news( response: Response, limit: int = Query(10, le=50), source: Optional[str] = None, language: Optional[str] = None, days_back: int = Query(7, ge=1, le=30, description="Number of days to look back (default: 7)"), vector_store: VectorStorePort = Depends(get_vector_store_port) ): """Get latest news articles sorted by publication date (default: last 7 days)""" # Prevent caching of news results - always fetch fresh data response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate, max-age=0" response.headers["Pragma"] = "no-cache" response.headers["Expires"] = "0" try: result = vector_store.browse( limit=limit, offset=0, source=source, language=language, days_back=days_back ) # Convert Qdrant points to articles (already sorted by Qdrant) articles = [] for p in result["articles"]: article = _dict_to_article(p.payload or {}, getattr(p, "score", None)) articles.append(article) # Ensure next_offset is an integer or None next_offset = result.get("next_offset") if next_offset is not None and not isinstance(next_offset, int): next_offset = None # If it's not an int, set to None return BrowseResponse( total_returned=len(articles), articles=articles, next_offset=next_offset ) except Exception as e: # Return empty response on error instead of 500 print(f"Error fetching news: {e}") return BrowseResponse( total_returned=0, articles=[], next_offset=None ) @router.get("/browse", response_model=BrowseResponse) def browse_news( limit: int = Query(20, le=100), offset: int = 0, source: Optional[str] = None, language: Optional[str] = None, vector_store: VectorStorePort = Depends(get_vector_store_port) ): result = vector_store.browse(limit=limit, offset=offset, source=source, language=language) # Qdrant scroll returns points with .payload articles = [] for p in result["articles"]: articles.append(_dict_to_article(p.payload or {}, getattr(p, "score", None))) return BrowseResponse( total_returned=len(articles), articles=articles, next_offset=result["next_offset"] ) @router.get("/search", response_model=SearchResponse) def search_news( q: str = Query(..., min_length=1), top_k: int = Query(10, le=50), source: Optional[str] = None, language: Optional[str] = None, embedder: EmbedderPort = Depends(get_embedder_port), vector_store: VectorStorePort = Depends(get_vector_store_port) ): try: query_vector = embedder.encode_query(q) results = vector_store.search( query_vectors=query_vector, limit=top_k, source_filter=source, language_filter=language ) # VectorStorePort.search returns SearchResult objects (content, metadata, score, doc_id) articles = [] for r in results: articles.append(_dict_to_article(r.metadata, r.score)) return SearchResponse(results=articles) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @router.get("/sources") def get_sources(analytics: AnalyticsUseCase = Depends(get_analytics_use_case)): query = "SELECT source, count() as article_count FROM sentiment_results GROUP BY source ORDER BY article_count DESC" res = analytics.execute_raw_query(query) if res and res.get("rows"): sources = [row[0] for row in res["rows"] if row[0]] return {"sources": sources} return {"sources": []} @router.get("/{doc_id}", response_model=NewsArticle) def get_news_article(doc_id: str, vector_store: VectorStorePort = Depends(get_vector_store_port)): result = vector_store.get_by_doc_id(doc_id) if not result: raise HTTPException(status_code=404, detail="Article not found") return _dict_to_article(result.metadata, result.score)