Spaces:

Madras1
/

Lancer

Sleeping

App Files Files Community

Madras1 commited on 11 days ago

Commit

dc2d570

verified ·

1 Parent(s): 3b5c441

Upload 53 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.env.example +11 -0
.gitattributes +5 -0
.gitignore +45 -0
.venv/.gitignore +1 -0
.venv/bin/Activate.ps1 +247 -0
.venv/bin/activate +63 -0
.venv/bin/python.exe +3 -0
.venv/bin/python3.11.exe +3 -0
.venv/bin/python3.exe +3 -0
.venv/bin/python3w.exe +3 -0
.venv/bin/pythonw.exe +3 -0
.venv/pyvenv.cfg +5 -0
Dockerfile +21 -0
README.md +54 -11
app/__init__.py +3 -0
app/__pycache__/__init__.cpython-311.pyc +0 -0
app/__pycache__/config.cpython-311.pyc +0 -0
app/__pycache__/main.cpython-311.pyc +0 -0
app/agents/__init__.py +1 -0
app/agents/__pycache__/__init__.cpython-311.pyc +0 -0
app/agents/__pycache__/llm_client.cpython-311.pyc +0 -0
app/agents/__pycache__/synthesizer.cpython-311.pyc +0 -0
app/agents/llm_client.py +105 -0
app/agents/synthesizer.py +127 -0
app/api/__init__.py +1 -0
app/api/__pycache__/__init__.cpython-311.pyc +0 -0
app/api/__pycache__/schemas.cpython-311.pyc +0 -0
app/api/routes/__init__.py +1 -0
app/api/routes/__pycache__/__init__.cpython-311.pyc +0 -0
app/api/routes/__pycache__/search.cpython-311.pyc +0 -0
app/api/routes/search.py +146 -0
app/api/schemas.py +112 -0
app/config.py +52 -0
app/main.py +64 -0
app/reranking/__init__.py +1 -0
app/reranking/__pycache__/__init__.cpython-311.pyc +0 -0
app/reranking/__pycache__/authority_scorer.cpython-311.pyc +0 -0
app/reranking/__pycache__/pipeline.cpython-311.pyc +0 -0
app/reranking/authority_scorer.py +134 -0
app/reranking/pipeline.py +99 -0
app/sources/__init__.py +1 -0
app/sources/__pycache__/__init__.cpython-311.pyc +0 -0
app/sources/__pycache__/duckduckgo.cpython-311.pyc +0 -0
app/sources/__pycache__/tavily.cpython-311.pyc +0 -0
app/sources/duckduckgo.py +103 -0
app/sources/tavily.py +106 -0
app/temporal/__init__.py +1 -0
app/temporal/__pycache__/__init__.cpython-311.pyc +0 -0
app/temporal/__pycache__/freshness_scorer.cpython-311.pyc +0 -0
app/temporal/__pycache__/intent_detector.cpython-311.pyc +0 -0

.env.example ADDED Viewed

	@@ -0,0 +1,11 @@

+# LLM Providers (choose one or both)
+GROQ_API_KEY=gsk_your_groq_key
+OPENROUTER_API_KEY=sk-or-your_openrouter_key
+# Search Sources
+TAVILY_API_KEY=tvly-your_tavily_key
+SERPER_API_KEY=your_serper_key  # Optional
+# Configuration
+LLM_PROVIDER=groq  # or "openrouter"
+LLM_MODEL=llama-3.3-70b-versatile

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+.venv/bin/python.exe filter=lfs diff=lfs merge=lfs -text
+.venv/bin/python3.11.exe filter=lfs diff=lfs merge=lfs -text
+.venv/bin/python3.exe filter=lfs diff=lfs merge=lfs -text
+.venv/bin/python3w.exe filter=lfs diff=lfs merge=lfs -text
+.venv/bin/pythonw.exe filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,45 @@

+# HuggingFace Spaces files
+*.hf
+.hf
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+.env
+.venv/
+venv/
+ENV/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+# OS
+.DS_Store
+Thumbs.db
+# Local development
+*.log
+.cache/

.venv/.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ *

.venv/bin/Activate.ps1 ADDED Viewed

	@@ -0,0 +1,247 @@

+<#
+.Synopsis
+Activate a Python virtual environment for the current PowerShell session.
+.Description
+Pushes the python executable for a virtual environment to the front of the
+$Env:PATH environment variable and sets the prompt to signify that you are
+in a Python virtual environment. Makes use of the command line switches as
+well as the `pyvenv.cfg` file values present in the virtual environment.
+.Parameter VenvDir
+Path to the directory that contains the virtual environment to activate. The
+default value for this is the parent of the directory that the Activate.ps1
+script is located within.
+.Parameter Prompt
+The prompt prefix to display when this virtual environment is activated. By
+default, this prompt is the name of the virtual environment folder (VenvDir)
+surrounded by parentheses and followed by a single space (ie. '(.venv) ').
+.Example
+Activate.ps1
+Activates the Python virtual environment that contains the Activate.ps1 script.
+.Example
+Activate.ps1 -Verbose
+Activates the Python virtual environment that contains the Activate.ps1 script,
+and shows extra information about the activation as it executes.
+.Example
+Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
+Activates the Python virtual environment located in the specified location.
+.Example
+Activate.ps1 -Prompt "MyPython"
+Activates the Python virtual environment that contains the Activate.ps1 script,
+and prefixes the current prompt with the specified string (surrounded in
+parentheses) while the virtual environment is active.
+.Notes
+On Windows, it may be required to enable this Activate.ps1 script by setting the
+execution policy for the user. You can do this by issuing the following PowerShell
+command:
+PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
+For more information on Execution Policies:
+https://go.microsoft.com/fwlink/?LinkID=135170
+#>
+Param(
+    [Parameter(Mandatory = $false)]
+    [String]
+    $VenvDir,
+    [Parameter(Mandatory = $false)]
+    [String]
+    $Prompt
+)
+<# Function declarations --------------------------------------------------- #>
+<#
+.Synopsis
+Remove all shell session elements added by the Activate script, including the
+addition of the virtual environment's Python executable from the beginning of
+the PATH variable.
+.Parameter NonDestructive
+If present, do not remove this function from the global namespace for the
+session.
+#>
+function global:deactivate ([switch]$NonDestructive) {
+    # Revert to original values
+    # The prior prompt:
+    if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
+        Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
+        Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
+    }
+    # The prior PYTHONHOME:
+    if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
+        Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
+        Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
+    }
+    # The prior PATH:
+    if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
+        Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
+        Remove-Item -Path Env:_OLD_VIRTUAL_PATH
+    }
+    # Just remove the VIRTUAL_ENV altogether:
+    if (Test-Path -Path Env:VIRTUAL_ENV) {
+        Remove-Item -Path env:VIRTUAL_ENV
+    }
+    # Just remove VIRTUAL_ENV_PROMPT altogether.
+    if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
+        Remove-Item -Path env:VIRTUAL_ENV_PROMPT
+    }
+    # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
+    if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
+        Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
+    }
+    # Leave deactivate function in the global namespace if requested:
+    if (-not $NonDestructive) {
+        Remove-Item -Path function:deactivate
+    }
+}
+<#
+.Description
+Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
+given folder, and returns them in a map.
+For each line in the pyvenv.cfg file, if that line can be parsed into exactly
+two strings separated by `=` (with any amount of whitespace surrounding the =)
+then it is considered a `key = value` line. The left hand string is the key,
+the right hand is the value.
+If the value starts with a `'` or a `"` then the first and last character is
+stripped from the value before being captured.
+.Parameter ConfigDir
+Path to the directory that contains the `pyvenv.cfg` file.
+#>
+function Get-PyVenvConfig(
+    [String]
+    $ConfigDir
+) {
+    Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
+    # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
+    $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
+    # An empty map will be returned if no config file is found.
+    $pyvenvConfig = @{ }
+    if ($pyvenvConfigPath) {
+        Write-Verbose "File exists, parse `key = value` lines"
+        $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
+        $pyvenvConfigContent | ForEach-Object {
+            $keyval = $PSItem -split "\s*=\s*", 2
+            if ($keyval[0] -and $keyval[1]) {
+                $val = $keyval[1]
+                # Remove extraneous quotations around a string value.
+                if ("'""".Contains($val.Substring(0, 1))) {
+                    $val = $val.Substring(1, $val.Length - 2)
+                }
+                $pyvenvConfig[$keyval[0]] = $val
+                Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
+            }
+        }
+    }
+    return $pyvenvConfig
+}
+<# Begin Activate script --------------------------------------------------- #>
+# Determine the containing directory of this script
+$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
+$VenvExecDir = Get-Item -Path $VenvExecPath
+Write-Verbose "Activation script is located in path: '$VenvExecPath'"
+Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
+Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
+# Set values required in priority: CmdLine, ConfigFile, Default
+# First, get the location of the virtual environment, it might not be
+# VenvExecDir if specified on the command line.
+if ($VenvDir) {
+    Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
+}
+else {
+    Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
+    $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
+    Write-Verbose "VenvDir=$VenvDir"
+}
+# Next, read the `pyvenv.cfg` file to determine any required value such
+# as `prompt`.
+$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
+# Next, set the prompt from the command line, or the config file, or
+# just use the name of the virtual environment folder.
+if ($Prompt) {
+    Write-Verbose "Prompt specified as argument, using '$Prompt'"
+}
+else {
+    Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
+    if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
+        Write-Verbose "  Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
+        $Prompt = $pyvenvCfg['prompt'];
+    }
+    else {
+        Write-Verbose "  Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
+        Write-Verbose "  Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
+        $Prompt = Split-Path -Path $venvDir -Leaf
+    }
+}
+Write-Verbose "Prompt = '$Prompt'"
+Write-Verbose "VenvDir='$VenvDir'"
+# Deactivate any currently active virtual environment, but leave the
+# deactivate function in place.
+deactivate -nondestructive
+# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
+# that there is an activated venv.
+$env:VIRTUAL_ENV = $VenvDir
+if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
+    Write-Verbose "Setting prompt to '$Prompt'"
+    # Set the prompt to include the env name
+    # Make sure _OLD_VIRTUAL_PROMPT is global
+    function global:_OLD_VIRTUAL_PROMPT { "" }
+    Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
+    New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
+    function global:prompt {
+        Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
+        _OLD_VIRTUAL_PROMPT
+    }
+    $env:VIRTUAL_ENV_PROMPT = $Prompt
+}
+# Clear PYTHONHOME
+if (Test-Path -Path Env:PYTHONHOME) {
+    Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
+    Remove-Item -Path Env:PYTHONHOME
+}
+# Add the venv to the PATH
+Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
+$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"

.venv/bin/activate ADDED Viewed

	@@ -0,0 +1,63 @@

+# This file must be used with "source bin/activate" *from bash*
+# you cannot run it directly
+deactivate () {
+    # reset old environment variables
+    if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
+        PATH="${_OLD_VIRTUAL_PATH:-}"
+        export PATH
+        unset _OLD_VIRTUAL_PATH
+    fi
+    if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
+        PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
+        export PYTHONHOME
+        unset _OLD_VIRTUAL_PYTHONHOME
+    fi
+    # Call hash to forget past commands. Without forgetting
+    # past commands the $PATH changes we made may not be respected
+    hash -r 2> /dev/null
+    if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
+        PS1="${_OLD_VIRTUAL_PS1:-}"
+        export PS1
+        unset _OLD_VIRTUAL_PS1
+    fi
+    unset VIRTUAL_ENV
+    unset VIRTUAL_ENV_PROMPT
+    if [ ! "${1:-}" = "nondestructive" ] ; then
+    # Self destruct!
+        unset -f deactivate
+    fi
+}
+# unset irrelevant variables
+deactivate nondestructive
+VIRTUAL_ENV=$(cygpath "C:\Users\gabri\Lancer\.venv")
+export VIRTUAL_ENV
+_OLD_VIRTUAL_PATH="$PATH"
+PATH="$VIRTUAL_ENV/bin:$PATH"
+export PATH
+# unset PYTHONHOME if set
+# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
+# could use `if (set -u; : $PYTHONHOME) ;` in bash
+if [ -n "${PYTHONHOME:-}" ] ; then
+    _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
+    unset PYTHONHOME
+fi
+if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
+    _OLD_VIRTUAL_PS1="${PS1:-}"
+    PS1="(.venv) ${PS1:-}"
+    export PS1
+    VIRTUAL_ENV_PROMPT="(.venv) "
+    export VIRTUAL_ENV_PROMPT
+fi
+# Call hash to forget past commands. Without forgetting
+# past commands the $PATH changes we made may not be respected
+hash -r 2> /dev/null

.venv/bin/python.exe ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9af09b8342333dd7ac86931f8542366d4cd8e733993e8442d7abe025dcffbfce
+size 138549

.venv/bin/python3.11.exe ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9af09b8342333dd7ac86931f8542366d4cd8e733993e8442d7abe025dcffbfce
+size 138549

.venv/bin/python3.exe ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9af09b8342333dd7ac86931f8542366d4cd8e733993e8442d7abe025dcffbfce
+size 138549

.venv/bin/python3w.exe ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:852435742fd9f70e20c4f9f9c0472f79247543bd88d72f16a70410e0a8a7b1d7
+size 112963

.venv/bin/pythonw.exe ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:852435742fd9f70e20c4f9f9c0472f79247543bd88d72f16a70410e0a8a7b1d7
+size 112963

.venv/pyvenv.cfg ADDED Viewed

	@@ -0,0 +1,5 @@

+home = C:\Program Files\Inkscape\bin
+include-system-site-packages = false
+version = 3.11.10
+executable = C:\Program Files\Inkscape\bin\python.exe
+command = C:\Program Files\Inkscape\bin\python.exe -m venv --without-pip C:\Users\gabri\Lancer\.venv

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+FROM python:3.11-slim
+WORKDIR /app
+# Install system dependencies for torch
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy and install dependencies
+COPY pyproject.toml .
+RUN pip install --no-cache-dir -e .
+# Copy application
+COPY app/ ./app/
+# HuggingFace Spaces uses port 7860
+EXPOSE 7860
+# Run with uvicorn
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,11 +1,54 @@
----
-title: Lancer
-emoji: 👀
-colorFrom: purple
-colorTo: pink
-sdk: docker
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Lancer Search API
+emoji: 🔍
+colorFrom: purple
+colorTo: blue
+sdk: docker
+pinned: false
+---
+# Lancer Search API
+🔍 Advanced AI-powered search API with temporal intelligence.
+## Features
+- **Temporal Intelligence**: Understands when you need fresh vs historical info
+- **Multi-Stage Reranking**: Freshness + Authority scoring
+- **Multi-Source Search**: Tavily, DuckDuckGo
+- **LLM Synthesis**: Groq or OpenRouter
+## API Endpoints
+```bash
+# Search with synthesis
+POST /api/v1/search
+{
+    "query": "What is the latest GPT model?",
+    "max_results": 10,
+    "freshness": "week"
+}
+# Health check
+GET /health
+```
+## Environment Variables
+Configure these in HuggingFace Space Secrets:
+| Variable | Required | Description |
+|----------|----------|-------------|
+| `GROQ_API_KEY` | Yes* | Groq API key |
+| `OPENROUTER_API_KEY` | Yes* | OpenRouter API key |
+| `TAVILY_API_KEY` | Yes | Tavily search API key |
+| `LLM_PROVIDER` | No | "groq" or "openrouter" |
+*At least one LLM provider key required
+## Local Development
+```bash
+pip install -e .
+uvicorn app.main:app --reload
+```

app/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ """Lancer - Advanced AI Search API"""
2	+
3	+ __version__ = "0.1.0"

app/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (218 Bytes). View file

app/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (2.28 kB). View file

app/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (2.5 kB). View file

app/agents/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Agents module."""

app/agents/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (181 Bytes). View file

app/agents/__pycache__/llm_client.cpython-311.pyc ADDED Viewed

Binary file (4.42 kB). View file

app/agents/__pycache__/synthesizer.cpython-311.pyc ADDED Viewed

Binary file (4.98 kB). View file

app/agents/llm_client.py ADDED Viewed

	@@ -0,0 +1,105 @@

+"""LLM client abstraction for multiple providers.
+Supports Groq and OpenRouter for LLM inference.
+"""
+import httpx
+from typing import Optional
+from app.config import get_settings
+async def generate_completion(
+    messages: list[dict],
+    model: Optional[str] = None,
+    temperature: float = 0.3,
+    max_tokens: int = 2048,
+) -> str:
+    """
+    Generate a completion using the configured LLM provider.
+    Args:
+        messages: List of message dicts with 'role' and 'content'
+        model: Model override (uses settings default if None)
+        temperature: Sampling temperature
+        max_tokens: Maximum tokens to generate
+    Returns:
+        Generated text content
+    """
+    settings = get_settings()
+    provider = settings.llm_provider
+    model = model or settings.llm_model
+    if provider == "groq":
+        return await _call_groq(messages, model, temperature, max_tokens)
+    elif provider == "openrouter":
+        return await _call_openrouter(messages, model, temperature, max_tokens)
+    else:
+        raise ValueError(f"Unknown LLM provider: {provider}")
+async def _call_groq(
+    messages: list[dict],
+    model: str,
+    temperature: float,
+    max_tokens: int,
+) -> str:
+    """Call Groq API."""
+    settings = get_settings()
+    if not settings.groq_api_key:
+        raise ValueError("GROQ_API_KEY not configured")
+    async with httpx.AsyncClient(timeout=60.0) as client:
+        response = await client.post(
+            "https://api.groq.com/openai/v1/chat/completions",
+            headers={
+                "Authorization": f"Bearer {settings.groq_api_key}",
+                "Content-Type": "application/json",
+            },
+            json={
+                "model": model,
+                "messages": messages,
+                "temperature": temperature,
+                "max_tokens": max_tokens,
+            },
+        )
+        response.raise_for_status()
+        data = response.json()
+    return data["choices"][0]["message"]["content"]
+async def _call_openrouter(
+    messages: list[dict],
+    model: str,
+    temperature: float,
+    max_tokens: int,
+) -> str:
+    """Call OpenRouter API."""
+    settings = get_settings()
+    if not settings.openrouter_api_key:
+        raise ValueError("OPENROUTER_API_KEY not configured")
+    async with httpx.AsyncClient(timeout=60.0) as client:
+        response = await client.post(
+            "https://openrouter.ai/api/v1/chat/completions",
+            headers={
+                "Authorization": f"Bearer {settings.openrouter_api_key}",
+                "Content-Type": "application/json",
+                "HTTP-Referer": "https://lancer-api.hf.space",
+                "X-Title": "Lancer Search API",
+            },
+            json={
+                "model": model,
+                "messages": messages,
+                "temperature": temperature,
+                "max_tokens": max_tokens,
+            },
+        )
+        response.raise_for_status()
+        data = response.json()
+    return data["choices"][0]["message"]["content"]

app/agents/synthesizer.py ADDED Viewed

	@@ -0,0 +1,127 @@

+"""Answer synthesizer agent.
+Generates a coherent answer from search results with citations.
+"""
+from datetime import datetime
+from typing import Optional
+from app.api.schemas import SearchResult, TemporalContext, Citation
+from app.agents.llm_client import generate_completion
+SYNTHESIS_PROMPT = """You are a research assistant that synthesizes information from search results.
+CURRENT DATE: {current_date}
+USER QUERY: {query}
+TEMPORAL CONTEXT:
+- Query intent: {temporal_intent} (the user {intent_explanation})
+- Temporal urgency: {temporal_urgency:.0%} (how important freshness is)
+SEARCH RESULTS:
+{formatted_results}
+INSTRUCTIONS:
+1. Synthesize a comprehensive answer based on the search results
+2. ALWAYS cite your sources using [1], [2], etc. format
+3. If the query requires current information, prioritize the most recent results
+4. If there are conflicting dates or versions mentioned, use the most recent accurate information
+5. Be concise but thorough
+6. If information seems outdated compared to current date ({current_date}), note this
+7. Write in the same language as the query
+Generate your answer:"""
+async def synthesize_answer(
+    query: str,
+    results: list[SearchResult],
+    temporal_context: Optional[TemporalContext] = None,
+) -> tuple[str, list[Citation]]:
+    """
+    Synthesize an answer from search results.
+    Args:
+        query: Original search query
+        results: List of search results to synthesize from
+        temporal_context: Temporal analysis context
+    Returns:
+        Tuple of (answer_text, citations_list)
+    """
+    if not results:
+        return "No results found to synthesize an answer.", []
+    # Format results for the prompt
+    formatted_results = format_results_for_prompt(results[:10])  # Top 10 only
+    # Prepare temporal context
+    current_date = datetime.now().strftime("%Y-%m-%d")
+    temporal_intent = "neutral"
+    temporal_urgency = 0.5
+    if temporal_context:
+        temporal_intent = temporal_context.query_temporal_intent
+        temporal_urgency = temporal_context.temporal_urgency
+        current_date = temporal_context.current_date
+    # Map intent to explanation
+    intent_explanations = {
+        "current": "is looking for the most recent/current information",
+        "historical": "is interested in historical or background information",
+        "neutral": "has no specific temporal preference",
+    }
+    prompt = SYNTHESIS_PROMPT.format(
+        current_date=current_date,
+        query=query,
+        temporal_intent=temporal_intent,
+        intent_explanation=intent_explanations.get(temporal_intent, ""),
+        temporal_urgency=temporal_urgency,
+        formatted_results=formatted_results,
+    )
+    messages = [
+        {"role": "system", "content": "You are a helpful research assistant."},
+        {"role": "user", "content": prompt},
+    ]
+    try:
+        answer = await generate_completion(messages, temperature=0.3)
+    except Exception as e:
+        # Fallback: return a simple summary without LLM
+        answer = f"Error generating synthesis: {e}. Please review the search results directly."
+    # Build citations list
+    citations = []
+    for i, result in enumerate(results[:10], 1):
+        citations.append(
+            Citation(
+                index=i,
+                url=result.url,
+                title=result.title,
+            )
+        )
+    return answer, citations
+def format_results_for_prompt(results: list[SearchResult]) -> str:
+    """Format search results for inclusion in the LLM prompt."""
+    formatted = []
+    for i, result in enumerate(results, 1):
+        date_str = ""
+        if result.published_date:
+            date_str = f" (Published: {result.published_date.strftime('%Y-%m-%d')})"
+        formatted.append(
+            f"[{i}] {result.title}{date_str}\n"
+            f"    URL: {result.url}\n"
+            f"    Freshness: {result.freshness_score:.0%} | Authority: {result.authority_score:.0%}\n"
+            f"    Content: {result.content[:500]}..."
+        )
+    return "\n\n".join(formatted)

app/api/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """API routes package."""

app/api/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (183 Bytes). View file

app/api/__pycache__/schemas.cpython-311.pyc ADDED Viewed

Binary file (5.92 kB). View file

app/api/routes/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """API routes package."""

app/api/routes/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (190 Bytes). View file

app/api/routes/__pycache__/search.cpython-311.pyc ADDED Viewed

Binary file (5.54 kB). View file

app/api/routes/search.py ADDED Viewed

	@@ -0,0 +1,146 @@

+"""Search API routes."""
+import time
+from datetime import datetime
+from fastapi import APIRouter, HTTPException
+from app.api.schemas import (
+    SearchRequest,
+    SearchResponse,
+    SearchResult,
+    TemporalContext,
+    Citation,
+    ErrorResponse,
+)
+from app.config import get_settings
+from app.temporal.intent_detector import detect_temporal_intent
+from app.temporal.freshness_scorer import calculate_freshness_score
+from app.sources.tavily import search_tavily
+from app.sources.duckduckgo import search_duckduckgo
+from app.reranking.pipeline import rerank_results
+from app.agents.synthesizer import synthesize_answer
+router = APIRouter()
+@router.post(
+    "/search",
+    response_model=SearchResponse,
+    responses={500: {"model": ErrorResponse}},
+    summary="Search with AI synthesis",
+    description="Perform a search with temporal intelligence and return an AI-synthesized answer.",
+)
+async def search(request: SearchRequest) -> SearchResponse:
+    """
+    Perform an intelligent search with:
+    - Temporal intent detection
+    - Multi-source search
+    - Multi-stage reranking
+    - AI-powered answer synthesis
+    """
+    start_time = time.perf_counter()
+    settings = get_settings()
+    try:
+        # Step 1: Analyze temporal intent
+        temporal_intent, temporal_urgency = detect_temporal_intent(request.query)
+        temporal_context = TemporalContext(
+            query_temporal_intent=temporal_intent,
+            temporal_urgency=temporal_urgency,
+            current_date=datetime.now().strftime("%Y-%m-%d"),
+        )
+        # Step 2: Search multiple sources
+        raw_results = []
+        # Try Tavily first (best quality)
+        if settings.tavily_api_key:
+            tavily_results = await search_tavily(
+                query=request.query,
+                max_results=settings.max_search_results,
+                freshness=request.freshness,
+                include_domains=request.include_domains,
+                exclude_domains=request.exclude_domains,
+            )
+            raw_results.extend(tavily_results)
+        # Fallback to DuckDuckGo if needed
+        if not raw_results:
+            ddg_results = await search_duckduckgo(
+                query=request.query,
+                max_results=settings.max_search_results,
+            )
+            raw_results.extend(ddg_results)
+        if not raw_results:
+            return SearchResponse(
+                query=request.query,
+                answer="No results found for your query.",
+                results=[],
+                citations=[],
+                temporal_context=temporal_context,
+                processing_time_ms=(time.perf_counter() - start_time) * 1000,
+            )
+        # Step 3: Apply multi-stage reranking
+        ranked_results = await rerank_results(
+            query=request.query,
+            results=raw_results,
+            temporal_urgency=temporal_urgency,
+            max_results=request.max_results,
+        )
+        # Step 4: Convert to SearchResult models
+        search_results = []
+        for i, result in enumerate(ranked_results):
+            freshness = calculate_freshness_score(result.get("published_date"))
+            search_results.append(
+                SearchResult(
+                    title=result.get("title", ""),
+                    url=result.get("url", ""),
+                    content=result.get("content", ""),
+                    score=result.get("score", 0.5),
+                    published_date=result.get("published_date"),
+                    freshness_score=freshness,
+                    authority_score=result.get("authority_score", 0.5),
+                )
+            )
+        # Step 5: Synthesize answer (if requested)
+        answer = None
+        citations = []
+        if request.include_answer and search_results:
+            answer, citations = await synthesize_answer(
+                query=request.query,
+                results=search_results,
+                temporal_context=temporal_context,
+            )
+        processing_time = (time.perf_counter() - start_time) * 1000
+        return SearchResponse(
+            query=request.query,
+            answer=answer,
+            results=search_results,
+            citations=citations,
+            temporal_context=temporal_context,
+            processing_time_ms=processing_time,
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@router.post(
+    "/search/raw",
+    response_model=SearchResponse,
+    summary="Search without synthesis",
+    description="Perform a search and return raw results without AI synthesis (faster).",
+)
+async def search_raw(request: SearchRequest) -> SearchResponse:
+    """Fast search without answer synthesis."""
+    request.include_answer = False
+    return await search(request)

app/api/schemas.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""Pydantic schemas for API request/response models."""
+from datetime import datetime
+from typing import Literal
+from pydantic import BaseModel, Field
+# === Request Models ===
+class SearchRequest(BaseModel):
+    """Search request payload."""
+    query: str = Field(..., min_length=1, max_length=1000, description="Search query")
+    max_results: int = Field(default=10, ge=1, le=50, description="Maximum results to return")
+    freshness: Literal["day", "week", "month", "year", "any"] = Field(
+        default="any",
+        description="Filter results by recency"
+    )
+    include_domains: list[str] | None = Field(
+        default=None,
+        description="Only include results from these domains"
+    )
+    exclude_domains: list[str] | None = Field(
+        default=None,
+        description="Exclude results from these domains"
+    )
+    include_answer: bool = Field(
+        default=True,
+        description="Include AI-generated answer"
+    )
+# === Response Models ===
+class Citation(BaseModel):
+    """Citation reference for the answer."""
+    index: int = Field(..., description="Citation index (1-based)")
+    url: str = Field(..., description="Source URL")
+    title: str = Field(..., description="Source title")
+class TemporalContext(BaseModel):
+    """Temporal metadata about the search."""
+    query_temporal_intent: Literal["current", "historical", "neutral"] = Field(
+        ...,
+        description="Detected temporal intent of the query"
+    )
+    temporal_urgency: float = Field(
+        ...,
+        ge=0.0,
+        le=1.0,
+        description="How important freshness is for this query (0-1)"
+    )
+    current_date: str = Field(..., description="Current date for context")
+class SearchResult(BaseModel):
+    """Individual search result."""
+    title: str = Field(..., description="Result title")
+    url: str = Field(..., description="Result URL")
+    content: str = Field(..., description="Result content/snippet")
+    score: float = Field(..., ge=0.0, le=1.0, description="Overall relevance score")
+    published_date: datetime | None = Field(
+        default=None,
+        description="Publication date if available"
+    )
+    freshness_score: float = Field(
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+        description="How fresh/recent the content is"
+    )
+    authority_score: float = Field(
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+        description="Domain authority/trust score"
+    )
+class SearchResponse(BaseModel):
+    """Complete search response."""
+    query: str = Field(..., description="Original query")
+    answer: str | None = Field(
+        default=None,
+        description="AI-generated answer synthesized from results"
+    )
+    results: list[SearchResult] = Field(
+        default_factory=list,
+        description="Ranked search results"
+    )
+    citations: list[Citation] = Field(
+        default_factory=list,
+        description="Citations referenced in the answer"
+    )
+    temporal_context: TemporalContext | None = Field(
+        default=None,
+        description="Temporal analysis metadata"
+    )
+    processing_time_ms: float = Field(..., description="Total processing time in milliseconds")
+class ErrorResponse(BaseModel):
+    """Error response model."""
+    error: str = Field(..., description="Error message")
+    detail: str | None = Field(default=None, description="Detailed error information")

app/config.py ADDED Viewed

	@@ -0,0 +1,52 @@

+"""Application configuration using pydantic-settings."""
+from functools import lru_cache
+from typing import Literal
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings):
+    """Application settings loaded from environment variables."""
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+    # API Keys - Search Sources
+    tavily_api_key: str = ""
+    serper_api_key: str | None = None
+    # API Keys - LLM Providers
+    groq_api_key: str | None = None
+    openrouter_api_key: str | None = None
+    # LLM Configuration
+    llm_provider: Literal["groq", "openrouter"] = "groq"
+    llm_model: str = "llama-3.3-70b-versatile"
+    # Reranking Models
+    bi_encoder_model: str = "BAAI/bge-small-en-v1.5"
+    cross_encoder_model: str = "BAAI/bge-reranker-base"
+    # Temporal Settings
+    default_freshness_half_life: int = 30  # days
+    # API Settings
+    max_search_results: int = 20
+    max_final_results: int = 10
+    @property
+    def llm_api_key(self) -> str:
+        """Get the appropriate API key based on provider."""
+        if self.llm_provider == "groq":
+            return self.groq_api_key or ""
+        return self.openrouter_api_key or ""
+@lru_cache
+def get_settings() -> Settings:
+    """Get cached settings instance."""
+    return Settings()

app/main.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""Lancer API - Main FastAPI application."""
+from contextlib import asynccontextmanager
+from datetime import datetime
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from app.api.routes import search
+from app.config import get_settings
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Application lifespan events."""
+    # Startup
+    settings = get_settings()
+    print(f"🚀 Lancer API starting...")
+    print(f"   LLM Provider: {settings.llm_provider}")
+    print(f"   LLM Model: {settings.llm_model}")
+    yield
+    # Shutdown
+    print("👋 Lancer API shutting down...")
+app = FastAPI(
+    title="Lancer Search API",
+    description="Advanced AI-powered search API with temporal intelligence",
+    version="0.1.0",
+    lifespan=lifespan,
+)
+# CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Include routers
+app.include_router(search.router, prefix="/api/v1", tags=["search"])
+@app.get("/health")
+async def health_check():
+    """Health check endpoint."""
+    return {
+        "status": "healthy",
+        "timestamp": datetime.now().isoformat(),
+        "version": "0.1.0",
+    }
+@app.get("/")
+async def root():
+    """Root endpoint with API info."""
+    return {
+        "name": "Lancer Search API",
+        "version": "0.1.0",
+        "docs": "/docs",
+        "health": "/health",
+    }

app/reranking/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Reranking module."""

app/reranking/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (187 Bytes). View file

app/reranking/__pycache__/authority_scorer.cpython-311.pyc ADDED Viewed

Binary file (4.48 kB). View file

app/reranking/__pycache__/pipeline.cpython-311.pyc ADDED Viewed

Binary file (3.55 kB). View file

app/reranking/authority_scorer.py ADDED Viewed

	@@ -0,0 +1,134 @@

+"""Domain authority scoring.
+Assigns trust/authority scores to domains based on known reliable sources.
+"""
+from urllib.parse import urlparse
+# High authority domains (trusted sources)
+HIGH_AUTHORITY_DOMAINS = {
+    # Academic & Research
+    ".edu": 0.9,
+    ".gov": 0.9,
+    ".ac.uk": 0.85,
+    # Major tech companies
+    "github.com": 0.8,
+    "stackoverflow.com": 0.8,
+    "docs.python.org": 0.85,
+    "developer.mozilla.org": 0.85,
+    "arxiv.org": 0.9,
+    # Major news sources
+    "reuters.com": 0.8,
+    "bbc.com": 0.75,
+    "nytimes.com": 0.75,
+    "theguardian.com": 0.75,
+    # Reference
+    "wikipedia.org": 0.7,
+    "britannica.com": 0.8,
+    # AI/ML specific
+    "openai.com": 0.85,
+    "anthropic.com": 0.85,
+    "huggingface.co": 0.8,
+    "deepmind.google": 0.85,
+    "ai.meta.com": 0.8,
+    # Tech publications
+    "techcrunch.com": 0.7,
+    "wired.com": 0.7,
+    "arstechnica.com": 0.75,
+    "theverge.com": 0.7,
+}
+# Low authority patterns (less reliable)
+LOW_AUTHORITY_PATTERNS = [
+    "medium.com",  # User-generated, variable quality
+    "reddit.com",  # Forum, variable quality
+    "quora.com",   # Q&A, variable quality
+    "blogspot.com",
+    "wordpress.com",
+    "tumblr.com",
+]
+def calculate_authority_score(url: str) -> float:
+    """
+    Calculate domain authority score for a URL.
+    Args:
+        url: The URL to score
+    Returns:
+        Authority score between 0.0 and 1.0
+    """
+    if not url:
+        return 0.5
+    try:
+        parsed = urlparse(url)
+        domain = parsed.netloc.lower()
+        # Remove www. prefix
+        if domain.startswith("www."):
+            domain = domain[4:]
+        # Check for exact domain matches
+        for known_domain, score in HIGH_AUTHORITY_DOMAINS.items():
+            if domain == known_domain or domain.endswith(known_domain):
+                return score
+        # Check for TLD-based authority (.edu, .gov, etc.)
+        for tld, score in HIGH_AUTHORITY_DOMAINS.items():
+            if tld.startswith(".") and domain.endswith(tld):
+                return score
+        # Check for low authority patterns
+        for pattern in LOW_AUTHORITY_PATTERNS:
+            if pattern in domain:
+                return 0.4
+        # Default score for unknown domains
+        return 0.5
+    except Exception:
+        return 0.5
+def get_domain_category(url: str) -> str:
+    """
+    Get a category label for the domain.
+    Args:
+        url: The URL to categorize
+    Returns:
+        Category string like "Academic", "News", "Tech", etc.
+    """
+    if not url:
+        return "Unknown"
+    try:
+        parsed = urlparse(url)
+        domain = parsed.netloc.lower()
+        if ".edu" in domain or ".ac.uk" in domain or "arxiv" in domain:
+            return "Academic"
+        elif ".gov" in domain:
+            return "Government"
+        elif any(site in domain for site in ["github", "stackoverflow", "docs."]):
+            return "Developer"
+        elif any(site in domain for site in ["reuters", "bbc", "nytimes", "cnn", "guardian"]):
+            return "News"
+        elif any(site in domain for site in ["openai", "anthropic", "huggingface", "deepmind"]):
+            return "AI/ML"
+        elif "wikipedia" in domain:
+            return "Reference"
+        else:
+            return "General"
+    except Exception:
+        return "Unknown"

app/reranking/pipeline.py ADDED Viewed

	@@ -0,0 +1,99 @@

+"""Multi-stage reranking pipeline.
+Implements a 3-stage reranking approach:
+1. Bi-Encoder: Fast semantic similarity (optional, for large result sets)
+2. Cross-Encoder: Accurate relevance scoring
+3. Temporal + Authority: Freshness and domain trust weighting
+"""
+from typing import Optional
+from app.temporal.freshness_scorer import calculate_freshness_score, adjust_score_by_freshness
+from app.reranking.authority_scorer import calculate_authority_score
+async def rerank_results(
+    query: str,
+    results: list[dict],
+    temporal_urgency: float = 0.5,
+    max_results: int = 10,
+) -> list[dict]:
+    """
+    Apply multi-stage reranking to search results.
+    For MVP, we use a simplified pipeline:
+    - Calculate freshness scores
+    - Calculate authority scores
+    - Combine with original relevance scores
+    Full pipeline with embeddings can be enabled later.
+    Args:
+        query: Original search query
+        results: Raw search results
+        temporal_urgency: How important freshness is (0-1)
+        max_results: Maximum results to return
+    Returns:
+        Reranked results with updated scores
+    """
+    if not results:
+        return []
+    # Stage 1: Skip bi-encoder for now (MVP)
+    # In production, use sentence-transformers for initial filtering of 100+ results
+    # Stage 2: Skip cross-encoder for now (MVP)
+    # In production, use BGE-reranker for precise scoring
+    # Stage 3: Apply temporal + authority scoring
+    scored_results = []
+    for result in results:
+        # Calculate freshness score
+        freshness = calculate_freshness_score(result.get("published_date"))
+        result["freshness_score"] = freshness
+        # Calculate authority score
+        authority = calculate_authority_score(result.get("url", ""))
+        result["authority_score"] = authority
+        # Get base score (from search source)
+        base_score = result.get("score", 0.5)
+        # Adjust for freshness based on temporal urgency
+        adjusted_score = adjust_score_by_freshness(
+            base_score=base_score,
+            freshness_score=freshness,
+            temporal_urgency=temporal_urgency,
+        )
+        # Also factor in authority (10% weight)
+        final_score = (adjusted_score * 0.9) + (authority * 0.1)
+        result["score"] = final_score
+        scored_results.append(result)
+    # Sort by final score (descending)
+    scored_results.sort(key=lambda x: x["score"], reverse=True)
+    return scored_results[:max_results]
+async def rerank_with_embeddings(
+    query: str,
+    results: list[dict],
+    max_results: int = 10,
+) -> list[dict]:
+    """
+    Full reranking with embedding models.
+    TODO: Implement when adding sentence-transformers support:
+    1. Use bi-encoder for fast filtering
+    2. Use cross-encoder for precise scoring
+    This is a placeholder for the full implementation.
+    """
+    # For now, just return sorted by original score
+    sorted_results = sorted(results, key=lambda x: x.get("score", 0), reverse=True)
+    return sorted_results[:max_results]

app/sources/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Search sources module."""

app/sources/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (190 Bytes). View file

app/sources/__pycache__/duckduckgo.cpython-311.pyc ADDED Viewed

Binary file (3.69 kB). View file

app/sources/__pycache__/tavily.cpython-311.pyc ADDED Viewed

Binary file (4.15 kB). View file

app/sources/duckduckgo.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""DuckDuckGo search source (free fallback).
+Uses the duckduckgo_search library for free web search.
+"""
+from datetime import datetime, timedelta
+from typing import Optional
+import httpx
+async def search_duckduckgo(
+    query: str,
+    max_results: int = 10,
+    region: str = "wt-wt",  # Worldwide
+) -> list[dict]:
+    """
+    Search using DuckDuckGo (free, no API key required).
+    This is a fallback when other sources are unavailable.
+    Uses the HTML endpoint for basic search.
+    Args:
+        query: Search query
+        max_results: Maximum results to return
+        region: Region code
+    Returns:
+        List of result dicts with title, url, content
+    """
+    try:
+        # Use DuckDuckGo HTML API (lightweight, no JS needed)
+        params = {
+            "q": query,
+            "kl": region,
+            "kp": "-1",  # Safe search off
+        }
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
+        }
+        async with httpx.AsyncClient(timeout=15.0) as client:
+            # Use DuckDuckGo Lite (simpler to parse)
+            response = await client.get(
+                "https://lite.duckduckgo.com/lite/",
+                params=params,
+                headers=headers,
+                follow_redirects=True,
+            )
+            response.raise_for_status()
+            html = response.text
+        # Simple HTML parsing for results
+        results = parse_ddg_lite_results(html, max_results)
+        return results
+    except Exception as e:
+        print(f"DuckDuckGo search error: {e}")
+        return []
+def parse_ddg_lite_results(html: str, max_results: int) -> list[dict]:
+    """
+    Parse DuckDuckGo Lite HTML results.
+    This is a simple parser for the lite version of DDG.
+    """
+    import re
+    results = []
+    # Find all result links (class="result-link")
+    # Pattern: <a rel="nofollow" href="URL" class='result-link'>TITLE</a>
+    link_pattern = r'<a[^>]*class=["\']result-link["\'][^>]*href=["\']([^"\']+)["\'][^>]*>([^<]+)</a>'
+    # Find snippets (class="result-snippet")
+    snippet_pattern = r'<td[^>]*class=["\']result-snippet["\'][^>]*>([^<]+)</td>'
+    links = re.findall(link_pattern, html, re.IGNORECASE)
+    snippets = re.findall(snippet_pattern, html, re.IGNORECASE)
+    for i, (url, title) in enumerate(links[:max_results]):
+        content = snippets[i] if i < len(snippets) else ""
+        # Clean up HTML entities
+        title = title.strip()
+        content = content.strip()
+        # Skip DuckDuckGo internal links
+        if "duckduckgo.com" in url:
+            continue
+        results.append({
+            "title": title,
+            "url": url,
+            "content": content,
+            "published_date": None,  # DDG Lite doesn't provide dates
+            "score": 0.5,  # Neutral score, will be reranked
+            "source": "duckduckgo",
+        })
+    return results[:max_results]

app/sources/tavily.py ADDED Viewed

	@@ -0,0 +1,106 @@

+"""Tavily search source integration.
+Tavily provides high-quality, AI-optimized search results.
+"""
+from datetime import datetime
+from typing import Literal, Optional
+import httpx
+from app.config import get_settings
+async def search_tavily(
+    query: str,
+    max_results: int = 10,
+    freshness: Literal["day", "week", "month", "year", "any"] = "any",
+    include_domains: Optional[list[str]] = None,
+    exclude_domains: Optional[list[str]] = None,
+    search_depth: Literal["basic", "advanced"] = "advanced",
+) -> list[dict]:
+    """
+    Search using Tavily API.
+    Args:
+        query: Search query
+        max_results: Maximum results to return
+        freshness: Filter by recency
+        include_domains: Only include these domains
+        exclude_domains: Exclude these domains
+        search_depth: "basic" (fast) or "advanced" (thorough)
+    Returns:
+        List of result dicts with title, url, content, published_date, score
+    """
+    settings = get_settings()
+    if not settings.tavily_api_key:
+        return []
+    # Map freshness to Tavily's days parameter
+    days_map = {
+        "day": 1,
+        "week": 7,
+        "month": 30,
+        "year": 365,
+        "any": None,
+    }
+    payload = {
+        "api_key": settings.tavily_api_key,
+        "query": query,
+        "search_depth": search_depth,
+        "max_results": max_results,
+        "include_answer": False,
+        "include_raw_content": False,
+    }
+    # Add optional filters
+    if days_map.get(freshness):
+        payload["days"] = days_map[freshness]
+    if include_domains:
+        payload["include_domains"] = include_domains
+    if exclude_domains:
+        payload["exclude_domains"] = exclude_domains
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            response = await client.post(
+                "https://api.tavily.com/search",
+                json=payload,
+            )
+            response.raise_for_status()
+            data = response.json()
+        results = []
+        for item in data.get("results", []):
+            # Parse published date if available
+            pub_date = None
+            if "published_date" in item and item["published_date"]:
+                try:
+                    pub_date = datetime.fromisoformat(
+                        item["published_date"].replace("Z", "+00:00")
+                    )
+                except (ValueError, TypeError):
+                    pass
+            results.append({
+                "title": item.get("title", ""),
+                "url": item.get("url", ""),
+                "content": item.get("content", ""),
+                "published_date": pub_date,
+                "score": item.get("score", 0.5),
+                "source": "tavily",
+            })
+        return results
+    except httpx.HTTPError as e:
+        print(f"Tavily search error: {e}")
+        return []
+    except Exception as e:
+        print(f"Tavily unexpected error: {e}")
+        return []

app/temporal/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Temporal intelligence module."""

app/temporal/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (198 Bytes). View file

app/temporal/__pycache__/freshness_scorer.cpython-311.pyc ADDED Viewed

Binary file (3.81 kB). View file

app/temporal/__pycache__/intent_detector.cpython-311.pyc ADDED Viewed

Binary file (3.01 kB). View file