File size: 5,562 Bytes
2da1e29
a63c61f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2da1e29
a63c61f
 
 
2da1e29
a63c61f
 
2da1e29
 
 
 
 
 
a63c61f
2da1e29
 
 
 
 
 
 
a63c61f
2da1e29
a63c61f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
from fastapi import APIRouter, Query, HTTPException, Depends, Response
from typing import Optional
from src.core.ports.vector_store_port import VectorStorePort
from src.core.ports.embedder_port import EmbedderPort
from src.core.use_cases.analytics_use_case import AnalyticsUseCase
from src.api.dependencies import get_vector_store_port, get_embedder_port, get_analytics_use_case
from src.core.domain.schemas import BrowseResponse, SearchResponse, NewsArticle

router = APIRouter()

def _dict_to_article(payload: dict, score: float = None) -> NewsArticle:
    if payload is None:
        payload = {}
    
    # Safely get metadata
    metadata = payload.get("metadata") if payload else {}
    if metadata is None:
        metadata = {}

    # Title can be stored at top-level payload OR nested inside metadata
    title = (
        payload.get("title")
        or (metadata.get("title") if isinstance(metadata, dict) else None)
    )
        
    return NewsArticle(
        doc_id=payload.get("doc_id", "unknown"),
        url=payload.get("url"),
        title=title,
        content=payload.get("text", payload.get("content", "")),
        source=payload.get("source"),
        published_at=payload.get("published_at"),
        score=score,
        metadata=metadata if isinstance(metadata, dict) else {}
    )

@router.get("/latest", response_model=BrowseResponse)
def get_latest_news(
    response: Response,
    limit: int = Query(10, le=50),
    source: Optional[str] = None,
    language: Optional[str] = None,
    days_back: int = Query(7, ge=1, le=30, description="Number of days to look back (default: 7)"),
    vector_store: VectorStorePort = Depends(get_vector_store_port)
):
    """Get latest news articles sorted by publication date (default: last 7 days)"""
    # Prevent caching of news results - always fetch fresh data
    response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate, max-age=0"
    response.headers["Pragma"] = "no-cache"
    response.headers["Expires"] = "0"
    
    try:
        result = vector_store.browse(
            limit=limit, 
            offset=0, 
            source=source, 
            language=language,
            days_back=days_back
        )
        
        # Convert Qdrant points to articles (already sorted by Qdrant)
        articles = []
        for p in result["articles"]:
            article = _dict_to_article(p.payload or {}, getattr(p, "score", None))
            articles.append(article)
        
        # Ensure next_offset is an integer or None
        next_offset = result.get("next_offset")
        if next_offset is not None and not isinstance(next_offset, int):
            next_offset = None  # If it's not an int, set to None
        
        return BrowseResponse(
            total_returned=len(articles),
            articles=articles,
            next_offset=next_offset
        )
    except Exception as e:
        # Return empty response on error instead of 500
        print(f"Error fetching news: {e}")
        return BrowseResponse(
            total_returned=0,
            articles=[],
            next_offset=None
        )

@router.get("/browse", response_model=BrowseResponse)
def browse_news(
    limit: int = Query(20, le=100), 
    offset: int = 0, 
    source: Optional[str] = None, 
    language: Optional[str] = None,
    vector_store: VectorStorePort = Depends(get_vector_store_port)
):
    result = vector_store.browse(limit=limit, offset=offset, source=source, language=language)
    
    # Qdrant scroll returns points with .payload
    articles = []
    for p in result["articles"]:
        articles.append(_dict_to_article(p.payload or {}, getattr(p, "score", None)))
    
    return BrowseResponse(
        total_returned=len(articles),
        articles=articles,
        next_offset=result["next_offset"]
    )

@router.get("/search", response_model=SearchResponse)
def search_news(
    q: str = Query(..., min_length=1),
    top_k: int = Query(10, le=50),
    source: Optional[str] = None,
    language: Optional[str] = None,
    embedder: EmbedderPort = Depends(get_embedder_port),
    vector_store: VectorStorePort = Depends(get_vector_store_port)
):
    try:
        query_vector = embedder.encode_query(q)
        results = vector_store.search(
            query_vectors=query_vector, 
            limit=top_k, 
            source_filter=source, 
            language_filter=language
        )
        # VectorStorePort.search returns SearchResult objects (content, metadata, score, doc_id)
        articles = []
        for r in results:
            articles.append(_dict_to_article(r.metadata, r.score))
            
        return SearchResponse(results=articles)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@router.get("/sources")
def get_sources(analytics: AnalyticsUseCase = Depends(get_analytics_use_case)):
    query = "SELECT source, count() as article_count FROM sentiment_results GROUP BY source ORDER BY article_count DESC"
    res = analytics.execute_raw_query(query)
    if res and res.get("rows"):
         sources = [row[0] for row in res["rows"] if row[0]]
         return {"sources": sources}
    return {"sources": []}

@router.get("/{doc_id}", response_model=NewsArticle)
def get_news_article(doc_id: str, vector_store: VectorStorePort = Depends(get_vector_store_port)):
    result = vector_store.get_by_doc_id(doc_id)
    if not result:
        raise HTTPException(status_code=404, detail="Article not found")
    
    return _dict_to_article(result.metadata, result.score)