Spaces:
Running
Running
| from fastapi import APIRouter, Query, HTTPException, Depends, Response | |
| from typing import Optional | |
| from src.core.ports.vector_store_port import VectorStorePort | |
| from src.core.ports.embedder_port import EmbedderPort | |
| from src.core.use_cases.analytics_use_case import AnalyticsUseCase | |
| from src.api.dependencies import get_vector_store_port, get_embedder_port, get_analytics_use_case | |
| from src.core.domain.schemas import BrowseResponse, SearchResponse, NewsArticle | |
| router = APIRouter() | |
| def _dict_to_article(payload: dict, score: float = None) -> NewsArticle: | |
| if payload is None: | |
| payload = {} | |
| # Safely get metadata | |
| metadata = payload.get("metadata") if payload else {} | |
| if metadata is None: | |
| metadata = {} | |
| # Title can be stored at top-level payload OR nested inside metadata | |
| title = ( | |
| payload.get("title") | |
| or (metadata.get("title") if isinstance(metadata, dict) else None) | |
| ) | |
| return NewsArticle( | |
| doc_id=payload.get("doc_id", "unknown"), | |
| url=payload.get("url"), | |
| title=title, | |
| content=payload.get("text", payload.get("content", "")), | |
| source=payload.get("source"), | |
| published_at=payload.get("published_at"), | |
| score=score, | |
| metadata=metadata if isinstance(metadata, dict) else {} | |
| ) | |
| def get_latest_news( | |
| response: Response, | |
| limit: int = Query(10, le=50), | |
| source: Optional[str] = None, | |
| language: Optional[str] = None, | |
| days_back: int = Query(7, ge=1, le=30, description="Number of days to look back (default: 7)"), | |
| vector_store: VectorStorePort = Depends(get_vector_store_port) | |
| ): | |
| """Get latest news articles sorted by publication date (default: last 7 days)""" | |
| # Prevent caching of news results - always fetch fresh data | |
| response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate, max-age=0" | |
| response.headers["Pragma"] = "no-cache" | |
| response.headers["Expires"] = "0" | |
| try: | |
| result = vector_store.browse( | |
| limit=limit, | |
| offset=0, | |
| source=source, | |
| language=language, | |
| days_back=days_back | |
| ) | |
| # Convert Qdrant points to articles (already sorted by Qdrant) | |
| articles = [] | |
| for p in result["articles"]: | |
| article = _dict_to_article(p.payload or {}, getattr(p, "score", None)) | |
| articles.append(article) | |
| # Ensure next_offset is an integer or None | |
| next_offset = result.get("next_offset") | |
| if next_offset is not None and not isinstance(next_offset, int): | |
| next_offset = None # If it's not an int, set to None | |
| return BrowseResponse( | |
| total_returned=len(articles), | |
| articles=articles, | |
| next_offset=next_offset | |
| ) | |
| except Exception as e: | |
| # Return empty response on error instead of 500 | |
| print(f"Error fetching news: {e}") | |
| return BrowseResponse( | |
| total_returned=0, | |
| articles=[], | |
| next_offset=None | |
| ) | |
| def browse_news( | |
| limit: int = Query(20, le=100), | |
| offset: int = 0, | |
| source: Optional[str] = None, | |
| language: Optional[str] = None, | |
| vector_store: VectorStorePort = Depends(get_vector_store_port) | |
| ): | |
| result = vector_store.browse(limit=limit, offset=offset, source=source, language=language) | |
| # Qdrant scroll returns points with .payload | |
| articles = [] | |
| for p in result["articles"]: | |
| articles.append(_dict_to_article(p.payload or {}, getattr(p, "score", None))) | |
| return BrowseResponse( | |
| total_returned=len(articles), | |
| articles=articles, | |
| next_offset=result["next_offset"] | |
| ) | |
| def search_news( | |
| q: str = Query(..., min_length=1), | |
| top_k: int = Query(10, le=50), | |
| source: Optional[str] = None, | |
| language: Optional[str] = None, | |
| embedder: EmbedderPort = Depends(get_embedder_port), | |
| vector_store: VectorStorePort = Depends(get_vector_store_port) | |
| ): | |
| try: | |
| query_vector = embedder.encode_query(q) | |
| results = vector_store.search( | |
| query_vectors=query_vector, | |
| limit=top_k, | |
| source_filter=source, | |
| language_filter=language | |
| ) | |
| # VectorStorePort.search returns SearchResult objects (content, metadata, score, doc_id) | |
| articles = [] | |
| for r in results: | |
| articles.append(_dict_to_article(r.metadata, r.score)) | |
| return SearchResponse(results=articles) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) | |
| def get_sources(analytics: AnalyticsUseCase = Depends(get_analytics_use_case)): | |
| query = "SELECT source, count() as article_count FROM sentiment_results GROUP BY source ORDER BY article_count DESC" | |
| res = analytics.execute_raw_query(query) | |
| if res and res.get("rows"): | |
| sources = [row[0] for row in res["rows"] if row[0]] | |
| return {"sources": sources} | |
| return {"sources": []} | |
| def get_news_article(doc_id: str, vector_store: VectorStorePort = Depends(get_vector_store_port)): | |
| result = vector_store.get_by_doc_id(doc_id) | |
| if not result: | |
| raise HTTPException(status_code=404, detail="Article not found") | |
| return _dict_to_article(result.metadata, result.score) | |