| """ |
| Hugging Face Space entrypoint for FRED-VDB semantic search. |
| |
| This is a thin Gradio wrapper around fred_search.FREDSearcher. All the real |
| work lives in the package; this file only: |
| 1. Downloads the prebuilt 118 MB LanceDB vector index from a HF Dataset. |
| 2. Instantiates ONE FREDSearcher at module load (model + index loaded once). |
| 3. Exposes a search box + frequency dropdown + popularity-boost toggle. |
| |
| Search is fully offline at request time — no FRED API key is needed or used. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import os |
| from pathlib import Path |
|
|
| import gradio as gr |
| from huggingface_hub import snapshot_download |
|
|
| from fred_search import FREDSearcher |
| from fred_search.models import FREDSearchResult |
|
|
| |
| |
| |
| |
| |
| |
| |
| INDEX_DATASET = os.environ.get("INDEX_DATASET", "Jacob235/fred-vector-index") |
|
|
| |
| |
| |
| |
| _index_root = Path( |
| snapshot_download( |
| repo_id=INDEX_DATASET, |
| repo_type="dataset", |
| local_dir=os.environ.get("INDEX_LOCAL_DIR", "/tmp/fred_index"), |
| ) |
| ) |
|
|
| |
| |
| |
| |
| |
| _searcher = FREDSearcher(data_dir=_index_root) |
|
|
| FREQUENCIES = ["Any", "Daily", "Weekly", "Monthly", "Quarterly", "Annual"] |
|
|
|
|
| def _render(results: list[FREDSearchResult]) -> str: |
| """Format search results as Markdown for the Gradio output panel.""" |
| if not results: |
| return "_No matching series found. Try rephrasing your query._" |
|
|
| blocks: list[str] = [] |
| for i, r in enumerate(results, start=1): |
| url = f"https://fred.stlouisfed.org/series/{r.series_id}" |
| header = f"### {i}. [`{r.series_id}`]({url}) — {r.title}" |
| meta = ( |
| f"**Similarity:** {r.similarity_score:.3f} · " |
| f"**Frequency:** {r.frequency} · " |
| f"**Units:** {r.units} · " |
| f"**Popularity:** {r.popularity}" |
| ) |
| notes = (r.notes or "").strip() |
| if len(notes) > 300: |
| notes = notes[:300].rstrip() + "…" |
| blocks.append("\n\n".join(p for p in (header, meta, notes) if p)) |
| return "\n\n---\n\n".join(blocks) |
|
|
|
|
| def search(query: str, frequency: str, popularity_boost: bool) -> str: |
| """Gradio handler: run a semantic search and return formatted Markdown.""" |
| query = (query or "").strip() |
| if not query: |
| return "_Enter a natural-language description of the data you want._" |
|
|
| results = _searcher.search( |
| query, |
| top_k=10, |
| frequency=None if frequency == "Any" else frequency, |
| popularity_boost=popularity_boost, |
| ) |
| return _render(results) |
|
|
|
|
| EXAMPLES = [ |
| ["indicators of commercial real estate credit stress", "Any", True], |
| ["inflation expectations vs realized inflation", "Monthly", True], |
| ["risk-free rate benchmarks at various maturities", "Daily", True], |
| ["housing supply pipeline for multifamily", "Any", True], |
| ] |
|
|
| with gr.Blocks(title="FRED-VDB — Semantic Search over FRED") as demo: |
| gr.Markdown( |
| "# FRED-VDB\n" |
| "Semantic search over 33,000+ curated FRED economic series. Describe the " |
| "data you want in plain language — matching is by *meaning*, not keywords." |
| ) |
| with gr.Row(): |
| query_box = gr.Textbox( |
| label="What are you looking for?", |
| placeholder="e.g. indicators of labor market slack", |
| scale=4, |
| ) |
| freq_dropdown = gr.Dropdown( |
| FREQUENCIES, value="Any", label="Frequency", scale=1 |
| ) |
| boost_toggle = gr.Checkbox( |
| value=True, |
| label="Boost well-known series (popularity re-ranking)", |
| info="On: surfaces headline series like UNRATE/DGS10. Off: pure similarity.", |
| ) |
| search_btn = gr.Button("Search", variant="primary") |
| output = gr.Markdown() |
|
|
| gr.Examples(EXAMPLES, inputs=[query_box, freq_dropdown, boost_toggle]) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| search_btn.click( |
| search, |
| inputs=[query_box, freq_dropdown, boost_toggle], |
| outputs=output, |
| api_name="search", |
| api_description=( |
| "Semantic search over 33,000+ curated FRED (Federal Reserve Economic " |
| "Data) time series — a high-signal subset filtered from FRED's full " |
| "~840K-series catalog. Given a natural-language description of the data you want " |
| "(e.g. 'labor market slack' or 'commercial real estate credit stress'), " |
| "returns the top 10 matching series ranked by meaning (not keywords), " |
| "formatted as Markdown. Each result includes the FRED series_id " |
| "(e.g. UNRATE), title, similarity score, frequency, units, and popularity. " |
| "Use the series_id to fetch the actual data via the FRED API or at " |
| "https://fred.stlouisfed.org/series/{series_id}. Optional args: filter by " |
| "`frequency` (Any/Daily/Weekly/Monthly/Quarterly/Annual) and toggle " |
| "`popularity_boost` to favor well-known headline series. The ranking " |
| "is a similarity heuristic, not an authoritative answer — the best " |
| "match for your intent is not always result #1. Read the titles and " |
| "metadata across the top 10 and use your own judgment to pick the " |
| "series that actually fits the request (or refine the query and " |
| "search again)." |
| ), |
| ) |
| query_box.submit( |
| search, |
| inputs=[query_box, freq_dropdown, boost_toggle], |
| outputs=output, |
| api_visibility="private", |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|