Upload 53 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .env.example +11 -0
- .gitattributes +5 -0
- .gitignore +45 -0
- .venv/.gitignore +1 -0
- .venv/bin/Activate.ps1 +247 -0
- .venv/bin/activate +63 -0
- .venv/bin/python.exe +3 -0
- .venv/bin/python3.11.exe +3 -0
- .venv/bin/python3.exe +3 -0
- .venv/bin/python3w.exe +3 -0
- .venv/bin/pythonw.exe +3 -0
- .venv/pyvenv.cfg +5 -0
- Dockerfile +21 -0
- README.md +54 -11
- app/__init__.py +3 -0
- app/__pycache__/__init__.cpython-311.pyc +0 -0
- app/__pycache__/config.cpython-311.pyc +0 -0
- app/__pycache__/main.cpython-311.pyc +0 -0
- app/agents/__init__.py +1 -0
- app/agents/__pycache__/__init__.cpython-311.pyc +0 -0
- app/agents/__pycache__/llm_client.cpython-311.pyc +0 -0
- app/agents/__pycache__/synthesizer.cpython-311.pyc +0 -0
- app/agents/llm_client.py +105 -0
- app/agents/synthesizer.py +127 -0
- app/api/__init__.py +1 -0
- app/api/__pycache__/__init__.cpython-311.pyc +0 -0
- app/api/__pycache__/schemas.cpython-311.pyc +0 -0
- app/api/routes/__init__.py +1 -0
- app/api/routes/__pycache__/__init__.cpython-311.pyc +0 -0
- app/api/routes/__pycache__/search.cpython-311.pyc +0 -0
- app/api/routes/search.py +146 -0
- app/api/schemas.py +112 -0
- app/config.py +52 -0
- app/main.py +64 -0
- app/reranking/__init__.py +1 -0
- app/reranking/__pycache__/__init__.cpython-311.pyc +0 -0
- app/reranking/__pycache__/authority_scorer.cpython-311.pyc +0 -0
- app/reranking/__pycache__/pipeline.cpython-311.pyc +0 -0
- app/reranking/authority_scorer.py +134 -0
- app/reranking/pipeline.py +99 -0
- app/sources/__init__.py +1 -0
- app/sources/__pycache__/__init__.cpython-311.pyc +0 -0
- app/sources/__pycache__/duckduckgo.cpython-311.pyc +0 -0
- app/sources/__pycache__/tavily.cpython-311.pyc +0 -0
- app/sources/duckduckgo.py +103 -0
- app/sources/tavily.py +106 -0
- app/temporal/__init__.py +1 -0
- app/temporal/__pycache__/__init__.cpython-311.pyc +0 -0
- app/temporal/__pycache__/freshness_scorer.cpython-311.pyc +0 -0
- app/temporal/__pycache__/intent_detector.cpython-311.pyc +0 -0
.env.example
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LLM Providers (choose one or both)
|
| 2 |
+
GROQ_API_KEY=gsk_your_groq_key
|
| 3 |
+
OPENROUTER_API_KEY=sk-or-your_openrouter_key
|
| 4 |
+
|
| 5 |
+
# Search Sources
|
| 6 |
+
TAVILY_API_KEY=tvly-your_tavily_key
|
| 7 |
+
SERPER_API_KEY=your_serper_key # Optional
|
| 8 |
+
|
| 9 |
+
# Configuration
|
| 10 |
+
LLM_PROVIDER=groq # or "openrouter"
|
| 11 |
+
LLM_MODEL=llama-3.3-70b-versatile
|
.gitattributes
CHANGED
|
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
.venv/bin/python.exe filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
.venv/bin/python3.11.exe filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
.venv/bin/python3.exe filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
.venv/bin/python3w.exe filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
.venv/bin/pythonw.exe filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HuggingFace Spaces files
|
| 2 |
+
*.hf
|
| 3 |
+
.hf
|
| 4 |
+
|
| 5 |
+
# Python
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.py[cod]
|
| 8 |
+
*$py.class
|
| 9 |
+
*.so
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
*.egg-info/
|
| 24 |
+
.installed.cfg
|
| 25 |
+
*.egg
|
| 26 |
+
|
| 27 |
+
# Virtual environments
|
| 28 |
+
.env
|
| 29 |
+
.venv/
|
| 30 |
+
venv/
|
| 31 |
+
ENV/
|
| 32 |
+
|
| 33 |
+
# IDE
|
| 34 |
+
.idea/
|
| 35 |
+
.vscode/
|
| 36 |
+
*.swp
|
| 37 |
+
*.swo
|
| 38 |
+
|
| 39 |
+
# OS
|
| 40 |
+
.DS_Store
|
| 41 |
+
Thumbs.db
|
| 42 |
+
|
| 43 |
+
# Local development
|
| 44 |
+
*.log
|
| 45 |
+
.cache/
|
.venv/.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*
|
.venv/bin/Activate.ps1
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<#
|
| 2 |
+
.Synopsis
|
| 3 |
+
Activate a Python virtual environment for the current PowerShell session.
|
| 4 |
+
|
| 5 |
+
.Description
|
| 6 |
+
Pushes the python executable for a virtual environment to the front of the
|
| 7 |
+
$Env:PATH environment variable and sets the prompt to signify that you are
|
| 8 |
+
in a Python virtual environment. Makes use of the command line switches as
|
| 9 |
+
well as the `pyvenv.cfg` file values present in the virtual environment.
|
| 10 |
+
|
| 11 |
+
.Parameter VenvDir
|
| 12 |
+
Path to the directory that contains the virtual environment to activate. The
|
| 13 |
+
default value for this is the parent of the directory that the Activate.ps1
|
| 14 |
+
script is located within.
|
| 15 |
+
|
| 16 |
+
.Parameter Prompt
|
| 17 |
+
The prompt prefix to display when this virtual environment is activated. By
|
| 18 |
+
default, this prompt is the name of the virtual environment folder (VenvDir)
|
| 19 |
+
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
|
| 20 |
+
|
| 21 |
+
.Example
|
| 22 |
+
Activate.ps1
|
| 23 |
+
Activates the Python virtual environment that contains the Activate.ps1 script.
|
| 24 |
+
|
| 25 |
+
.Example
|
| 26 |
+
Activate.ps1 -Verbose
|
| 27 |
+
Activates the Python virtual environment that contains the Activate.ps1 script,
|
| 28 |
+
and shows extra information about the activation as it executes.
|
| 29 |
+
|
| 30 |
+
.Example
|
| 31 |
+
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
|
| 32 |
+
Activates the Python virtual environment located in the specified location.
|
| 33 |
+
|
| 34 |
+
.Example
|
| 35 |
+
Activate.ps1 -Prompt "MyPython"
|
| 36 |
+
Activates the Python virtual environment that contains the Activate.ps1 script,
|
| 37 |
+
and prefixes the current prompt with the specified string (surrounded in
|
| 38 |
+
parentheses) while the virtual environment is active.
|
| 39 |
+
|
| 40 |
+
.Notes
|
| 41 |
+
On Windows, it may be required to enable this Activate.ps1 script by setting the
|
| 42 |
+
execution policy for the user. You can do this by issuing the following PowerShell
|
| 43 |
+
command:
|
| 44 |
+
|
| 45 |
+
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
| 46 |
+
|
| 47 |
+
For more information on Execution Policies:
|
| 48 |
+
https://go.microsoft.com/fwlink/?LinkID=135170
|
| 49 |
+
|
| 50 |
+
#>
|
| 51 |
+
Param(
|
| 52 |
+
[Parameter(Mandatory = $false)]
|
| 53 |
+
[String]
|
| 54 |
+
$VenvDir,
|
| 55 |
+
[Parameter(Mandatory = $false)]
|
| 56 |
+
[String]
|
| 57 |
+
$Prompt
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
<# Function declarations --------------------------------------------------- #>
|
| 61 |
+
|
| 62 |
+
<#
|
| 63 |
+
.Synopsis
|
| 64 |
+
Remove all shell session elements added by the Activate script, including the
|
| 65 |
+
addition of the virtual environment's Python executable from the beginning of
|
| 66 |
+
the PATH variable.
|
| 67 |
+
|
| 68 |
+
.Parameter NonDestructive
|
| 69 |
+
If present, do not remove this function from the global namespace for the
|
| 70 |
+
session.
|
| 71 |
+
|
| 72 |
+
#>
|
| 73 |
+
function global:deactivate ([switch]$NonDestructive) {
|
| 74 |
+
# Revert to original values
|
| 75 |
+
|
| 76 |
+
# The prior prompt:
|
| 77 |
+
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
|
| 78 |
+
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
|
| 79 |
+
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
# The prior PYTHONHOME:
|
| 83 |
+
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
|
| 84 |
+
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
|
| 85 |
+
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
# The prior PATH:
|
| 89 |
+
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
|
| 90 |
+
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
|
| 91 |
+
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
# Just remove the VIRTUAL_ENV altogether:
|
| 95 |
+
if (Test-Path -Path Env:VIRTUAL_ENV) {
|
| 96 |
+
Remove-Item -Path env:VIRTUAL_ENV
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
# Just remove VIRTUAL_ENV_PROMPT altogether.
|
| 100 |
+
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
|
| 101 |
+
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
|
| 105 |
+
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
|
| 106 |
+
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
# Leave deactivate function in the global namespace if requested:
|
| 110 |
+
if (-not $NonDestructive) {
|
| 111 |
+
Remove-Item -Path function:deactivate
|
| 112 |
+
}
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
<#
|
| 116 |
+
.Description
|
| 117 |
+
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
|
| 118 |
+
given folder, and returns them in a map.
|
| 119 |
+
|
| 120 |
+
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
|
| 121 |
+
two strings separated by `=` (with any amount of whitespace surrounding the =)
|
| 122 |
+
then it is considered a `key = value` line. The left hand string is the key,
|
| 123 |
+
the right hand is the value.
|
| 124 |
+
|
| 125 |
+
If the value starts with a `'` or a `"` then the first and last character is
|
| 126 |
+
stripped from the value before being captured.
|
| 127 |
+
|
| 128 |
+
.Parameter ConfigDir
|
| 129 |
+
Path to the directory that contains the `pyvenv.cfg` file.
|
| 130 |
+
#>
|
| 131 |
+
function Get-PyVenvConfig(
|
| 132 |
+
[String]
|
| 133 |
+
$ConfigDir
|
| 134 |
+
) {
|
| 135 |
+
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
|
| 136 |
+
|
| 137 |
+
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
|
| 138 |
+
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
|
| 139 |
+
|
| 140 |
+
# An empty map will be returned if no config file is found.
|
| 141 |
+
$pyvenvConfig = @{ }
|
| 142 |
+
|
| 143 |
+
if ($pyvenvConfigPath) {
|
| 144 |
+
|
| 145 |
+
Write-Verbose "File exists, parse `key = value` lines"
|
| 146 |
+
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
|
| 147 |
+
|
| 148 |
+
$pyvenvConfigContent | ForEach-Object {
|
| 149 |
+
$keyval = $PSItem -split "\s*=\s*", 2
|
| 150 |
+
if ($keyval[0] -and $keyval[1]) {
|
| 151 |
+
$val = $keyval[1]
|
| 152 |
+
|
| 153 |
+
# Remove extraneous quotations around a string value.
|
| 154 |
+
if ("'""".Contains($val.Substring(0, 1))) {
|
| 155 |
+
$val = $val.Substring(1, $val.Length - 2)
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
$pyvenvConfig[$keyval[0]] = $val
|
| 159 |
+
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
|
| 160 |
+
}
|
| 161 |
+
}
|
| 162 |
+
}
|
| 163 |
+
return $pyvenvConfig
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
<# Begin Activate script --------------------------------------------------- #>
|
| 168 |
+
|
| 169 |
+
# Determine the containing directory of this script
|
| 170 |
+
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
| 171 |
+
$VenvExecDir = Get-Item -Path $VenvExecPath
|
| 172 |
+
|
| 173 |
+
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
|
| 174 |
+
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
|
| 175 |
+
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
|
| 176 |
+
|
| 177 |
+
# Set values required in priority: CmdLine, ConfigFile, Default
|
| 178 |
+
# First, get the location of the virtual environment, it might not be
|
| 179 |
+
# VenvExecDir if specified on the command line.
|
| 180 |
+
if ($VenvDir) {
|
| 181 |
+
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
|
| 182 |
+
}
|
| 183 |
+
else {
|
| 184 |
+
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
|
| 185 |
+
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
|
| 186 |
+
Write-Verbose "VenvDir=$VenvDir"
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
# Next, read the `pyvenv.cfg` file to determine any required value such
|
| 190 |
+
# as `prompt`.
|
| 191 |
+
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
|
| 192 |
+
|
| 193 |
+
# Next, set the prompt from the command line, or the config file, or
|
| 194 |
+
# just use the name of the virtual environment folder.
|
| 195 |
+
if ($Prompt) {
|
| 196 |
+
Write-Verbose "Prompt specified as argument, using '$Prompt'"
|
| 197 |
+
}
|
| 198 |
+
else {
|
| 199 |
+
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
|
| 200 |
+
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
|
| 201 |
+
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
|
| 202 |
+
$Prompt = $pyvenvCfg['prompt'];
|
| 203 |
+
}
|
| 204 |
+
else {
|
| 205 |
+
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
|
| 206 |
+
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
|
| 207 |
+
$Prompt = Split-Path -Path $venvDir -Leaf
|
| 208 |
+
}
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
Write-Verbose "Prompt = '$Prompt'"
|
| 212 |
+
Write-Verbose "VenvDir='$VenvDir'"
|
| 213 |
+
|
| 214 |
+
# Deactivate any currently active virtual environment, but leave the
|
| 215 |
+
# deactivate function in place.
|
| 216 |
+
deactivate -nondestructive
|
| 217 |
+
|
| 218 |
+
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
|
| 219 |
+
# that there is an activated venv.
|
| 220 |
+
$env:VIRTUAL_ENV = $VenvDir
|
| 221 |
+
|
| 222 |
+
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
| 223 |
+
|
| 224 |
+
Write-Verbose "Setting prompt to '$Prompt'"
|
| 225 |
+
|
| 226 |
+
# Set the prompt to include the env name
|
| 227 |
+
# Make sure _OLD_VIRTUAL_PROMPT is global
|
| 228 |
+
function global:_OLD_VIRTUAL_PROMPT { "" }
|
| 229 |
+
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
|
| 230 |
+
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
|
| 231 |
+
|
| 232 |
+
function global:prompt {
|
| 233 |
+
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
|
| 234 |
+
_OLD_VIRTUAL_PROMPT
|
| 235 |
+
}
|
| 236 |
+
$env:VIRTUAL_ENV_PROMPT = $Prompt
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
# Clear PYTHONHOME
|
| 240 |
+
if (Test-Path -Path Env:PYTHONHOME) {
|
| 241 |
+
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
|
| 242 |
+
Remove-Item -Path Env:PYTHONHOME
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
# Add the venv to the PATH
|
| 246 |
+
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
|
| 247 |
+
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
|
.venv/bin/activate
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file must be used with "source bin/activate" *from bash*
|
| 2 |
+
# you cannot run it directly
|
| 3 |
+
|
| 4 |
+
deactivate () {
|
| 5 |
+
# reset old environment variables
|
| 6 |
+
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
|
| 7 |
+
PATH="${_OLD_VIRTUAL_PATH:-}"
|
| 8 |
+
export PATH
|
| 9 |
+
unset _OLD_VIRTUAL_PATH
|
| 10 |
+
fi
|
| 11 |
+
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
|
| 12 |
+
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
|
| 13 |
+
export PYTHONHOME
|
| 14 |
+
unset _OLD_VIRTUAL_PYTHONHOME
|
| 15 |
+
fi
|
| 16 |
+
|
| 17 |
+
# Call hash to forget past commands. Without forgetting
|
| 18 |
+
# past commands the $PATH changes we made may not be respected
|
| 19 |
+
hash -r 2> /dev/null
|
| 20 |
+
|
| 21 |
+
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
|
| 22 |
+
PS1="${_OLD_VIRTUAL_PS1:-}"
|
| 23 |
+
export PS1
|
| 24 |
+
unset _OLD_VIRTUAL_PS1
|
| 25 |
+
fi
|
| 26 |
+
|
| 27 |
+
unset VIRTUAL_ENV
|
| 28 |
+
unset VIRTUAL_ENV_PROMPT
|
| 29 |
+
if [ ! "${1:-}" = "nondestructive" ] ; then
|
| 30 |
+
# Self destruct!
|
| 31 |
+
unset -f deactivate
|
| 32 |
+
fi
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
# unset irrelevant variables
|
| 36 |
+
deactivate nondestructive
|
| 37 |
+
|
| 38 |
+
VIRTUAL_ENV=$(cygpath "C:\Users\gabri\Lancer\.venv")
|
| 39 |
+
export VIRTUAL_ENV
|
| 40 |
+
|
| 41 |
+
_OLD_VIRTUAL_PATH="$PATH"
|
| 42 |
+
PATH="$VIRTUAL_ENV/bin:$PATH"
|
| 43 |
+
export PATH
|
| 44 |
+
|
| 45 |
+
# unset PYTHONHOME if set
|
| 46 |
+
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
|
| 47 |
+
# could use `if (set -u; : $PYTHONHOME) ;` in bash
|
| 48 |
+
if [ -n "${PYTHONHOME:-}" ] ; then
|
| 49 |
+
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
|
| 50 |
+
unset PYTHONHOME
|
| 51 |
+
fi
|
| 52 |
+
|
| 53 |
+
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
|
| 54 |
+
_OLD_VIRTUAL_PS1="${PS1:-}"
|
| 55 |
+
PS1="(.venv) ${PS1:-}"
|
| 56 |
+
export PS1
|
| 57 |
+
VIRTUAL_ENV_PROMPT="(.venv) "
|
| 58 |
+
export VIRTUAL_ENV_PROMPT
|
| 59 |
+
fi
|
| 60 |
+
|
| 61 |
+
# Call hash to forget past commands. Without forgetting
|
| 62 |
+
# past commands the $PATH changes we made may not be respected
|
| 63 |
+
hash -r 2> /dev/null
|
.venv/bin/python.exe
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9af09b8342333dd7ac86931f8542366d4cd8e733993e8442d7abe025dcffbfce
|
| 3 |
+
size 138549
|
.venv/bin/python3.11.exe
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9af09b8342333dd7ac86931f8542366d4cd8e733993e8442d7abe025dcffbfce
|
| 3 |
+
size 138549
|
.venv/bin/python3.exe
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9af09b8342333dd7ac86931f8542366d4cd8e733993e8442d7abe025dcffbfce
|
| 3 |
+
size 138549
|
.venv/bin/python3w.exe
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:852435742fd9f70e20c4f9f9c0472f79247543bd88d72f16a70410e0a8a7b1d7
|
| 3 |
+
size 112963
|
.venv/bin/pythonw.exe
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:852435742fd9f70e20c4f9f9c0472f79247543bd88d72f16a70410e0a8a7b1d7
|
| 3 |
+
size 112963
|
.venv/pyvenv.cfg
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
home = C:\Program Files\Inkscape\bin
|
| 2 |
+
include-system-site-packages = false
|
| 3 |
+
version = 3.11.10
|
| 4 |
+
executable = C:\Program Files\Inkscape\bin\python.exe
|
| 5 |
+
command = C:\Program Files\Inkscape\bin\python.exe -m venv --without-pip C:\Users\gabri\Lancer\.venv
|
Dockerfile
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies for torch
|
| 6 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 7 |
+
build-essential \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
# Copy and install dependencies
|
| 11 |
+
COPY pyproject.toml .
|
| 12 |
+
RUN pip install --no-cache-dir -e .
|
| 13 |
+
|
| 14 |
+
# Copy application
|
| 15 |
+
COPY app/ ./app/
|
| 16 |
+
|
| 17 |
+
# HuggingFace Spaces uses port 7860
|
| 18 |
+
EXPOSE 7860
|
| 19 |
+
|
| 20 |
+
# Run with uvicorn
|
| 21 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,11 +1,54 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Lancer
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom: purple
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk: docker
|
| 7 |
-
pinned: false
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Lancer Search API
|
| 3 |
+
emoji: 🔍
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
# Lancer Search API
|
| 11 |
+
|
| 12 |
+
🔍 Advanced AI-powered search API with temporal intelligence.
|
| 13 |
+
|
| 14 |
+
## Features
|
| 15 |
+
|
| 16 |
+
- **Temporal Intelligence**: Understands when you need fresh vs historical info
|
| 17 |
+
- **Multi-Stage Reranking**: Freshness + Authority scoring
|
| 18 |
+
- **Multi-Source Search**: Tavily, DuckDuckGo
|
| 19 |
+
- **LLM Synthesis**: Groq or OpenRouter
|
| 20 |
+
|
| 21 |
+
## API Endpoints
|
| 22 |
+
|
| 23 |
+
```bash
|
| 24 |
+
# Search with synthesis
|
| 25 |
+
POST /api/v1/search
|
| 26 |
+
{
|
| 27 |
+
"query": "What is the latest GPT model?",
|
| 28 |
+
"max_results": 10,
|
| 29 |
+
"freshness": "week"
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
# Health check
|
| 33 |
+
GET /health
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
## Environment Variables
|
| 37 |
+
|
| 38 |
+
Configure these in HuggingFace Space Secrets:
|
| 39 |
+
|
| 40 |
+
| Variable | Required | Description |
|
| 41 |
+
|----------|----------|-------------|
|
| 42 |
+
| `GROQ_API_KEY` | Yes* | Groq API key |
|
| 43 |
+
| `OPENROUTER_API_KEY` | Yes* | OpenRouter API key |
|
| 44 |
+
| `TAVILY_API_KEY` | Yes | Tavily search API key |
|
| 45 |
+
| `LLM_PROVIDER` | No | "groq" or "openrouter" |
|
| 46 |
+
|
| 47 |
+
*At least one LLM provider key required
|
| 48 |
+
|
| 49 |
+
## Local Development
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
pip install -e .
|
| 53 |
+
uvicorn app.main:app --reload
|
| 54 |
+
```
|
app/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Lancer - Advanced AI Search API"""
|
| 2 |
+
|
| 3 |
+
__version__ = "0.1.0"
|
app/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (218 Bytes). View file
|
|
|
app/__pycache__/config.cpython-311.pyc
ADDED
|
Binary file (2.28 kB). View file
|
|
|
app/__pycache__/main.cpython-311.pyc
ADDED
|
Binary file (2.5 kB). View file
|
|
|
app/agents/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Agents module."""
|
app/agents/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (181 Bytes). View file
|
|
|
app/agents/__pycache__/llm_client.cpython-311.pyc
ADDED
|
Binary file (4.42 kB). View file
|
|
|
app/agents/__pycache__/synthesizer.cpython-311.pyc
ADDED
|
Binary file (4.98 kB). View file
|
|
|
app/agents/llm_client.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LLM client abstraction for multiple providers.
|
| 2 |
+
|
| 3 |
+
Supports Groq and OpenRouter for LLM inference.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import httpx
|
| 7 |
+
from typing import Optional
|
| 8 |
+
|
| 9 |
+
from app.config import get_settings
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
async def generate_completion(
|
| 13 |
+
messages: list[dict],
|
| 14 |
+
model: Optional[str] = None,
|
| 15 |
+
temperature: float = 0.3,
|
| 16 |
+
max_tokens: int = 2048,
|
| 17 |
+
) -> str:
|
| 18 |
+
"""
|
| 19 |
+
Generate a completion using the configured LLM provider.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
messages: List of message dicts with 'role' and 'content'
|
| 23 |
+
model: Model override (uses settings default if None)
|
| 24 |
+
temperature: Sampling temperature
|
| 25 |
+
max_tokens: Maximum tokens to generate
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
Generated text content
|
| 29 |
+
"""
|
| 30 |
+
settings = get_settings()
|
| 31 |
+
provider = settings.llm_provider
|
| 32 |
+
model = model or settings.llm_model
|
| 33 |
+
|
| 34 |
+
if provider == "groq":
|
| 35 |
+
return await _call_groq(messages, model, temperature, max_tokens)
|
| 36 |
+
elif provider == "openrouter":
|
| 37 |
+
return await _call_openrouter(messages, model, temperature, max_tokens)
|
| 38 |
+
else:
|
| 39 |
+
raise ValueError(f"Unknown LLM provider: {provider}")
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
async def _call_groq(
|
| 43 |
+
messages: list[dict],
|
| 44 |
+
model: str,
|
| 45 |
+
temperature: float,
|
| 46 |
+
max_tokens: int,
|
| 47 |
+
) -> str:
|
| 48 |
+
"""Call Groq API."""
|
| 49 |
+
settings = get_settings()
|
| 50 |
+
|
| 51 |
+
if not settings.groq_api_key:
|
| 52 |
+
raise ValueError("GROQ_API_KEY not configured")
|
| 53 |
+
|
| 54 |
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
| 55 |
+
response = await client.post(
|
| 56 |
+
"https://api.groq.com/openai/v1/chat/completions",
|
| 57 |
+
headers={
|
| 58 |
+
"Authorization": f"Bearer {settings.groq_api_key}",
|
| 59 |
+
"Content-Type": "application/json",
|
| 60 |
+
},
|
| 61 |
+
json={
|
| 62 |
+
"model": model,
|
| 63 |
+
"messages": messages,
|
| 64 |
+
"temperature": temperature,
|
| 65 |
+
"max_tokens": max_tokens,
|
| 66 |
+
},
|
| 67 |
+
)
|
| 68 |
+
response.raise_for_status()
|
| 69 |
+
data = response.json()
|
| 70 |
+
|
| 71 |
+
return data["choices"][0]["message"]["content"]
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
async def _call_openrouter(
|
| 75 |
+
messages: list[dict],
|
| 76 |
+
model: str,
|
| 77 |
+
temperature: float,
|
| 78 |
+
max_tokens: int,
|
| 79 |
+
) -> str:
|
| 80 |
+
"""Call OpenRouter API."""
|
| 81 |
+
settings = get_settings()
|
| 82 |
+
|
| 83 |
+
if not settings.openrouter_api_key:
|
| 84 |
+
raise ValueError("OPENROUTER_API_KEY not configured")
|
| 85 |
+
|
| 86 |
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
| 87 |
+
response = await client.post(
|
| 88 |
+
"https://openrouter.ai/api/v1/chat/completions",
|
| 89 |
+
headers={
|
| 90 |
+
"Authorization": f"Bearer {settings.openrouter_api_key}",
|
| 91 |
+
"Content-Type": "application/json",
|
| 92 |
+
"HTTP-Referer": "https://lancer-api.hf.space",
|
| 93 |
+
"X-Title": "Lancer Search API",
|
| 94 |
+
},
|
| 95 |
+
json={
|
| 96 |
+
"model": model,
|
| 97 |
+
"messages": messages,
|
| 98 |
+
"temperature": temperature,
|
| 99 |
+
"max_tokens": max_tokens,
|
| 100 |
+
},
|
| 101 |
+
)
|
| 102 |
+
response.raise_for_status()
|
| 103 |
+
data = response.json()
|
| 104 |
+
|
| 105 |
+
return data["choices"][0]["message"]["content"]
|
app/agents/synthesizer.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Answer synthesizer agent.
|
| 2 |
+
|
| 3 |
+
Generates a coherent answer from search results with citations.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from typing import Optional
|
| 8 |
+
|
| 9 |
+
from app.api.schemas import SearchResult, TemporalContext, Citation
|
| 10 |
+
from app.agents.llm_client import generate_completion
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
SYNTHESIS_PROMPT = """You are a research assistant that synthesizes information from search results.
|
| 14 |
+
|
| 15 |
+
CURRENT DATE: {current_date}
|
| 16 |
+
|
| 17 |
+
USER QUERY: {query}
|
| 18 |
+
|
| 19 |
+
TEMPORAL CONTEXT:
|
| 20 |
+
- Query intent: {temporal_intent} (the user {intent_explanation})
|
| 21 |
+
- Temporal urgency: {temporal_urgency:.0%} (how important freshness is)
|
| 22 |
+
|
| 23 |
+
SEARCH RESULTS:
|
| 24 |
+
{formatted_results}
|
| 25 |
+
|
| 26 |
+
INSTRUCTIONS:
|
| 27 |
+
1. Synthesize a comprehensive answer based on the search results
|
| 28 |
+
2. ALWAYS cite your sources using [1], [2], etc. format
|
| 29 |
+
3. If the query requires current information, prioritize the most recent results
|
| 30 |
+
4. If there are conflicting dates or versions mentioned, use the most recent accurate information
|
| 31 |
+
5. Be concise but thorough
|
| 32 |
+
6. If information seems outdated compared to current date ({current_date}), note this
|
| 33 |
+
7. Write in the same language as the query
|
| 34 |
+
|
| 35 |
+
Generate your answer:"""
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
async def synthesize_answer(
|
| 39 |
+
query: str,
|
| 40 |
+
results: list[SearchResult],
|
| 41 |
+
temporal_context: Optional[TemporalContext] = None,
|
| 42 |
+
) -> tuple[str, list[Citation]]:
|
| 43 |
+
"""
|
| 44 |
+
Synthesize an answer from search results.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
query: Original search query
|
| 48 |
+
results: List of search results to synthesize from
|
| 49 |
+
temporal_context: Temporal analysis context
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
Tuple of (answer_text, citations_list)
|
| 53 |
+
"""
|
| 54 |
+
if not results:
|
| 55 |
+
return "No results found to synthesize an answer.", []
|
| 56 |
+
|
| 57 |
+
# Format results for the prompt
|
| 58 |
+
formatted_results = format_results_for_prompt(results[:10]) # Top 10 only
|
| 59 |
+
|
| 60 |
+
# Prepare temporal context
|
| 61 |
+
current_date = datetime.now().strftime("%Y-%m-%d")
|
| 62 |
+
temporal_intent = "neutral"
|
| 63 |
+
temporal_urgency = 0.5
|
| 64 |
+
|
| 65 |
+
if temporal_context:
|
| 66 |
+
temporal_intent = temporal_context.query_temporal_intent
|
| 67 |
+
temporal_urgency = temporal_context.temporal_urgency
|
| 68 |
+
current_date = temporal_context.current_date
|
| 69 |
+
|
| 70 |
+
# Map intent to explanation
|
| 71 |
+
intent_explanations = {
|
| 72 |
+
"current": "is looking for the most recent/current information",
|
| 73 |
+
"historical": "is interested in historical or background information",
|
| 74 |
+
"neutral": "has no specific temporal preference",
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
prompt = SYNTHESIS_PROMPT.format(
|
| 78 |
+
current_date=current_date,
|
| 79 |
+
query=query,
|
| 80 |
+
temporal_intent=temporal_intent,
|
| 81 |
+
intent_explanation=intent_explanations.get(temporal_intent, ""),
|
| 82 |
+
temporal_urgency=temporal_urgency,
|
| 83 |
+
formatted_results=formatted_results,
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
messages = [
|
| 87 |
+
{"role": "system", "content": "You are a helpful research assistant."},
|
| 88 |
+
{"role": "user", "content": prompt},
|
| 89 |
+
]
|
| 90 |
+
|
| 91 |
+
try:
|
| 92 |
+
answer = await generate_completion(messages, temperature=0.3)
|
| 93 |
+
except Exception as e:
|
| 94 |
+
# Fallback: return a simple summary without LLM
|
| 95 |
+
answer = f"Error generating synthesis: {e}. Please review the search results directly."
|
| 96 |
+
|
| 97 |
+
# Build citations list
|
| 98 |
+
citations = []
|
| 99 |
+
for i, result in enumerate(results[:10], 1):
|
| 100 |
+
citations.append(
|
| 101 |
+
Citation(
|
| 102 |
+
index=i,
|
| 103 |
+
url=result.url,
|
| 104 |
+
title=result.title,
|
| 105 |
+
)
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
return answer, citations
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def format_results_for_prompt(results: list[SearchResult]) -> str:
|
| 112 |
+
"""Format search results for inclusion in the LLM prompt."""
|
| 113 |
+
formatted = []
|
| 114 |
+
|
| 115 |
+
for i, result in enumerate(results, 1):
|
| 116 |
+
date_str = ""
|
| 117 |
+
if result.published_date:
|
| 118 |
+
date_str = f" (Published: {result.published_date.strftime('%Y-%m-%d')})"
|
| 119 |
+
|
| 120 |
+
formatted.append(
|
| 121 |
+
f"[{i}] {result.title}{date_str}\n"
|
| 122 |
+
f" URL: {result.url}\n"
|
| 123 |
+
f" Freshness: {result.freshness_score:.0%} | Authority: {result.authority_score:.0%}\n"
|
| 124 |
+
f" Content: {result.content[:500]}..."
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
return "\n\n".join(formatted)
|
app/api/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""API routes package."""
|
app/api/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (183 Bytes). View file
|
|
|
app/api/__pycache__/schemas.cpython-311.pyc
ADDED
|
Binary file (5.92 kB). View file
|
|
|
app/api/routes/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""API routes package."""
|
app/api/routes/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (190 Bytes). View file
|
|
|
app/api/routes/__pycache__/search.cpython-311.pyc
ADDED
|
Binary file (5.54 kB). View file
|
|
|
app/api/routes/search.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Search API routes."""
|
| 2 |
+
|
| 3 |
+
import time
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
from fastapi import APIRouter, HTTPException
|
| 7 |
+
|
| 8 |
+
from app.api.schemas import (
|
| 9 |
+
SearchRequest,
|
| 10 |
+
SearchResponse,
|
| 11 |
+
SearchResult,
|
| 12 |
+
TemporalContext,
|
| 13 |
+
Citation,
|
| 14 |
+
ErrorResponse,
|
| 15 |
+
)
|
| 16 |
+
from app.config import get_settings
|
| 17 |
+
from app.temporal.intent_detector import detect_temporal_intent
|
| 18 |
+
from app.temporal.freshness_scorer import calculate_freshness_score
|
| 19 |
+
from app.sources.tavily import search_tavily
|
| 20 |
+
from app.sources.duckduckgo import search_duckduckgo
|
| 21 |
+
from app.reranking.pipeline import rerank_results
|
| 22 |
+
from app.agents.synthesizer import synthesize_answer
|
| 23 |
+
|
| 24 |
+
router = APIRouter()
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@router.post(
|
| 28 |
+
"/search",
|
| 29 |
+
response_model=SearchResponse,
|
| 30 |
+
responses={500: {"model": ErrorResponse}},
|
| 31 |
+
summary="Search with AI synthesis",
|
| 32 |
+
description="Perform a search with temporal intelligence and return an AI-synthesized answer.",
|
| 33 |
+
)
|
| 34 |
+
async def search(request: SearchRequest) -> SearchResponse:
|
| 35 |
+
"""
|
| 36 |
+
Perform an intelligent search with:
|
| 37 |
+
- Temporal intent detection
|
| 38 |
+
- Multi-source search
|
| 39 |
+
- Multi-stage reranking
|
| 40 |
+
- AI-powered answer synthesis
|
| 41 |
+
"""
|
| 42 |
+
start_time = time.perf_counter()
|
| 43 |
+
settings = get_settings()
|
| 44 |
+
|
| 45 |
+
try:
|
| 46 |
+
# Step 1: Analyze temporal intent
|
| 47 |
+
temporal_intent, temporal_urgency = detect_temporal_intent(request.query)
|
| 48 |
+
|
| 49 |
+
temporal_context = TemporalContext(
|
| 50 |
+
query_temporal_intent=temporal_intent,
|
| 51 |
+
temporal_urgency=temporal_urgency,
|
| 52 |
+
current_date=datetime.now().strftime("%Y-%m-%d"),
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
# Step 2: Search multiple sources
|
| 56 |
+
raw_results = []
|
| 57 |
+
|
| 58 |
+
# Try Tavily first (best quality)
|
| 59 |
+
if settings.tavily_api_key:
|
| 60 |
+
tavily_results = await search_tavily(
|
| 61 |
+
query=request.query,
|
| 62 |
+
max_results=settings.max_search_results,
|
| 63 |
+
freshness=request.freshness,
|
| 64 |
+
include_domains=request.include_domains,
|
| 65 |
+
exclude_domains=request.exclude_domains,
|
| 66 |
+
)
|
| 67 |
+
raw_results.extend(tavily_results)
|
| 68 |
+
|
| 69 |
+
# Fallback to DuckDuckGo if needed
|
| 70 |
+
if not raw_results:
|
| 71 |
+
ddg_results = await search_duckduckgo(
|
| 72 |
+
query=request.query,
|
| 73 |
+
max_results=settings.max_search_results,
|
| 74 |
+
)
|
| 75 |
+
raw_results.extend(ddg_results)
|
| 76 |
+
|
| 77 |
+
if not raw_results:
|
| 78 |
+
return SearchResponse(
|
| 79 |
+
query=request.query,
|
| 80 |
+
answer="No results found for your query.",
|
| 81 |
+
results=[],
|
| 82 |
+
citations=[],
|
| 83 |
+
temporal_context=temporal_context,
|
| 84 |
+
processing_time_ms=(time.perf_counter() - start_time) * 1000,
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
# Step 3: Apply multi-stage reranking
|
| 88 |
+
ranked_results = await rerank_results(
|
| 89 |
+
query=request.query,
|
| 90 |
+
results=raw_results,
|
| 91 |
+
temporal_urgency=temporal_urgency,
|
| 92 |
+
max_results=request.max_results,
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# Step 4: Convert to SearchResult models
|
| 96 |
+
search_results = []
|
| 97 |
+
for i, result in enumerate(ranked_results):
|
| 98 |
+
freshness = calculate_freshness_score(result.get("published_date"))
|
| 99 |
+
search_results.append(
|
| 100 |
+
SearchResult(
|
| 101 |
+
title=result.get("title", ""),
|
| 102 |
+
url=result.get("url", ""),
|
| 103 |
+
content=result.get("content", ""),
|
| 104 |
+
score=result.get("score", 0.5),
|
| 105 |
+
published_date=result.get("published_date"),
|
| 106 |
+
freshness_score=freshness,
|
| 107 |
+
authority_score=result.get("authority_score", 0.5),
|
| 108 |
+
)
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# Step 5: Synthesize answer (if requested)
|
| 112 |
+
answer = None
|
| 113 |
+
citations = []
|
| 114 |
+
|
| 115 |
+
if request.include_answer and search_results:
|
| 116 |
+
answer, citations = await synthesize_answer(
|
| 117 |
+
query=request.query,
|
| 118 |
+
results=search_results,
|
| 119 |
+
temporal_context=temporal_context,
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
processing_time = (time.perf_counter() - start_time) * 1000
|
| 123 |
+
|
| 124 |
+
return SearchResponse(
|
| 125 |
+
query=request.query,
|
| 126 |
+
answer=answer,
|
| 127 |
+
results=search_results,
|
| 128 |
+
citations=citations,
|
| 129 |
+
temporal_context=temporal_context,
|
| 130 |
+
processing_time_ms=processing_time,
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
except Exception as e:
|
| 134 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
@router.post(
|
| 138 |
+
"/search/raw",
|
| 139 |
+
response_model=SearchResponse,
|
| 140 |
+
summary="Search without synthesis",
|
| 141 |
+
description="Perform a search and return raw results without AI synthesis (faster).",
|
| 142 |
+
)
|
| 143 |
+
async def search_raw(request: SearchRequest) -> SearchResponse:
|
| 144 |
+
"""Fast search without answer synthesis."""
|
| 145 |
+
request.include_answer = False
|
| 146 |
+
return await search(request)
|
app/api/schemas.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic schemas for API request/response models."""
|
| 2 |
+
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from typing import Literal
|
| 5 |
+
|
| 6 |
+
from pydantic import BaseModel, Field
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# === Request Models ===
|
| 10 |
+
|
| 11 |
+
class SearchRequest(BaseModel):
|
| 12 |
+
"""Search request payload."""
|
| 13 |
+
|
| 14 |
+
query: str = Field(..., min_length=1, max_length=1000, description="Search query")
|
| 15 |
+
max_results: int = Field(default=10, ge=1, le=50, description="Maximum results to return")
|
| 16 |
+
freshness: Literal["day", "week", "month", "year", "any"] = Field(
|
| 17 |
+
default="any",
|
| 18 |
+
description="Filter results by recency"
|
| 19 |
+
)
|
| 20 |
+
include_domains: list[str] | None = Field(
|
| 21 |
+
default=None,
|
| 22 |
+
description="Only include results from these domains"
|
| 23 |
+
)
|
| 24 |
+
exclude_domains: list[str] | None = Field(
|
| 25 |
+
default=None,
|
| 26 |
+
description="Exclude results from these domains"
|
| 27 |
+
)
|
| 28 |
+
include_answer: bool = Field(
|
| 29 |
+
default=True,
|
| 30 |
+
description="Include AI-generated answer"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# === Response Models ===
|
| 35 |
+
|
| 36 |
+
class Citation(BaseModel):
|
| 37 |
+
"""Citation reference for the answer."""
|
| 38 |
+
|
| 39 |
+
index: int = Field(..., description="Citation index (1-based)")
|
| 40 |
+
url: str = Field(..., description="Source URL")
|
| 41 |
+
title: str = Field(..., description="Source title")
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class TemporalContext(BaseModel):
|
| 45 |
+
"""Temporal metadata about the search."""
|
| 46 |
+
|
| 47 |
+
query_temporal_intent: Literal["current", "historical", "neutral"] = Field(
|
| 48 |
+
...,
|
| 49 |
+
description="Detected temporal intent of the query"
|
| 50 |
+
)
|
| 51 |
+
temporal_urgency: float = Field(
|
| 52 |
+
...,
|
| 53 |
+
ge=0.0,
|
| 54 |
+
le=1.0,
|
| 55 |
+
description="How important freshness is for this query (0-1)"
|
| 56 |
+
)
|
| 57 |
+
current_date: str = Field(..., description="Current date for context")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class SearchResult(BaseModel):
|
| 61 |
+
"""Individual search result."""
|
| 62 |
+
|
| 63 |
+
title: str = Field(..., description="Result title")
|
| 64 |
+
url: str = Field(..., description="Result URL")
|
| 65 |
+
content: str = Field(..., description="Result content/snippet")
|
| 66 |
+
score: float = Field(..., ge=0.0, le=1.0, description="Overall relevance score")
|
| 67 |
+
published_date: datetime | None = Field(
|
| 68 |
+
default=None,
|
| 69 |
+
description="Publication date if available"
|
| 70 |
+
)
|
| 71 |
+
freshness_score: float = Field(
|
| 72 |
+
default=0.5,
|
| 73 |
+
ge=0.0,
|
| 74 |
+
le=1.0,
|
| 75 |
+
description="How fresh/recent the content is"
|
| 76 |
+
)
|
| 77 |
+
authority_score: float = Field(
|
| 78 |
+
default=0.5,
|
| 79 |
+
ge=0.0,
|
| 80 |
+
le=1.0,
|
| 81 |
+
description="Domain authority/trust score"
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
class SearchResponse(BaseModel):
|
| 86 |
+
"""Complete search response."""
|
| 87 |
+
|
| 88 |
+
query: str = Field(..., description="Original query")
|
| 89 |
+
answer: str | None = Field(
|
| 90 |
+
default=None,
|
| 91 |
+
description="AI-generated answer synthesized from results"
|
| 92 |
+
)
|
| 93 |
+
results: list[SearchResult] = Field(
|
| 94 |
+
default_factory=list,
|
| 95 |
+
description="Ranked search results"
|
| 96 |
+
)
|
| 97 |
+
citations: list[Citation] = Field(
|
| 98 |
+
default_factory=list,
|
| 99 |
+
description="Citations referenced in the answer"
|
| 100 |
+
)
|
| 101 |
+
temporal_context: TemporalContext | None = Field(
|
| 102 |
+
default=None,
|
| 103 |
+
description="Temporal analysis metadata"
|
| 104 |
+
)
|
| 105 |
+
processing_time_ms: float = Field(..., description="Total processing time in milliseconds")
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class ErrorResponse(BaseModel):
|
| 109 |
+
"""Error response model."""
|
| 110 |
+
|
| 111 |
+
error: str = Field(..., description="Error message")
|
| 112 |
+
detail: str | None = Field(default=None, description="Detailed error information")
|
app/config.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Application configuration using pydantic-settings."""
|
| 2 |
+
|
| 3 |
+
from functools import lru_cache
|
| 4 |
+
from typing import Literal
|
| 5 |
+
|
| 6 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Settings(BaseSettings):
|
| 10 |
+
"""Application settings loaded from environment variables."""
|
| 11 |
+
|
| 12 |
+
model_config = SettingsConfigDict(
|
| 13 |
+
env_file=".env",
|
| 14 |
+
env_file_encoding="utf-8",
|
| 15 |
+
extra="ignore",
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# API Keys - Search Sources
|
| 19 |
+
tavily_api_key: str = ""
|
| 20 |
+
serper_api_key: str | None = None
|
| 21 |
+
|
| 22 |
+
# API Keys - LLM Providers
|
| 23 |
+
groq_api_key: str | None = None
|
| 24 |
+
openrouter_api_key: str | None = None
|
| 25 |
+
|
| 26 |
+
# LLM Configuration
|
| 27 |
+
llm_provider: Literal["groq", "openrouter"] = "groq"
|
| 28 |
+
llm_model: str = "llama-3.3-70b-versatile"
|
| 29 |
+
|
| 30 |
+
# Reranking Models
|
| 31 |
+
bi_encoder_model: str = "BAAI/bge-small-en-v1.5"
|
| 32 |
+
cross_encoder_model: str = "BAAI/bge-reranker-base"
|
| 33 |
+
|
| 34 |
+
# Temporal Settings
|
| 35 |
+
default_freshness_half_life: int = 30 # days
|
| 36 |
+
|
| 37 |
+
# API Settings
|
| 38 |
+
max_search_results: int = 20
|
| 39 |
+
max_final_results: int = 10
|
| 40 |
+
|
| 41 |
+
@property
|
| 42 |
+
def llm_api_key(self) -> str:
|
| 43 |
+
"""Get the appropriate API key based on provider."""
|
| 44 |
+
if self.llm_provider == "groq":
|
| 45 |
+
return self.groq_api_key or ""
|
| 46 |
+
return self.openrouter_api_key or ""
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
@lru_cache
|
| 50 |
+
def get_settings() -> Settings:
|
| 51 |
+
"""Get cached settings instance."""
|
| 52 |
+
return Settings()
|
app/main.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Lancer API - Main FastAPI application."""
|
| 2 |
+
|
| 3 |
+
from contextlib import asynccontextmanager
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
from fastapi import FastAPI
|
| 7 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
+
|
| 9 |
+
from app.api.routes import search
|
| 10 |
+
from app.config import get_settings
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@asynccontextmanager
|
| 14 |
+
async def lifespan(app: FastAPI):
|
| 15 |
+
"""Application lifespan events."""
|
| 16 |
+
# Startup
|
| 17 |
+
settings = get_settings()
|
| 18 |
+
print(f"🚀 Lancer API starting...")
|
| 19 |
+
print(f" LLM Provider: {settings.llm_provider}")
|
| 20 |
+
print(f" LLM Model: {settings.llm_model}")
|
| 21 |
+
yield
|
| 22 |
+
# Shutdown
|
| 23 |
+
print("👋 Lancer API shutting down...")
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
app = FastAPI(
|
| 27 |
+
title="Lancer Search API",
|
| 28 |
+
description="Advanced AI-powered search API with temporal intelligence",
|
| 29 |
+
version="0.1.0",
|
| 30 |
+
lifespan=lifespan,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# CORS middleware
|
| 34 |
+
app.add_middleware(
|
| 35 |
+
CORSMiddleware,
|
| 36 |
+
allow_origins=["*"],
|
| 37 |
+
allow_credentials=True,
|
| 38 |
+
allow_methods=["*"],
|
| 39 |
+
allow_headers=["*"],
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# Include routers
|
| 43 |
+
app.include_router(search.router, prefix="/api/v1", tags=["search"])
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@app.get("/health")
|
| 47 |
+
async def health_check():
|
| 48 |
+
"""Health check endpoint."""
|
| 49 |
+
return {
|
| 50 |
+
"status": "healthy",
|
| 51 |
+
"timestamp": datetime.now().isoformat(),
|
| 52 |
+
"version": "0.1.0",
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@app.get("/")
|
| 57 |
+
async def root():
|
| 58 |
+
"""Root endpoint with API info."""
|
| 59 |
+
return {
|
| 60 |
+
"name": "Lancer Search API",
|
| 61 |
+
"version": "0.1.0",
|
| 62 |
+
"docs": "/docs",
|
| 63 |
+
"health": "/health",
|
| 64 |
+
}
|
app/reranking/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Reranking module."""
|
app/reranking/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (187 Bytes). View file
|
|
|
app/reranking/__pycache__/authority_scorer.cpython-311.pyc
ADDED
|
Binary file (4.48 kB). View file
|
|
|
app/reranking/__pycache__/pipeline.cpython-311.pyc
ADDED
|
Binary file (3.55 kB). View file
|
|
|
app/reranking/authority_scorer.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Domain authority scoring.
|
| 2 |
+
|
| 3 |
+
Assigns trust/authority scores to domains based on known reliable sources.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from urllib.parse import urlparse
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# High authority domains (trusted sources)
|
| 10 |
+
HIGH_AUTHORITY_DOMAINS = {
|
| 11 |
+
# Academic & Research
|
| 12 |
+
".edu": 0.9,
|
| 13 |
+
".gov": 0.9,
|
| 14 |
+
".ac.uk": 0.85,
|
| 15 |
+
|
| 16 |
+
# Major tech companies
|
| 17 |
+
"github.com": 0.8,
|
| 18 |
+
"stackoverflow.com": 0.8,
|
| 19 |
+
"docs.python.org": 0.85,
|
| 20 |
+
"developer.mozilla.org": 0.85,
|
| 21 |
+
"arxiv.org": 0.9,
|
| 22 |
+
|
| 23 |
+
# Major news sources
|
| 24 |
+
"reuters.com": 0.8,
|
| 25 |
+
"bbc.com": 0.75,
|
| 26 |
+
"nytimes.com": 0.75,
|
| 27 |
+
"theguardian.com": 0.75,
|
| 28 |
+
|
| 29 |
+
# Reference
|
| 30 |
+
"wikipedia.org": 0.7,
|
| 31 |
+
"britannica.com": 0.8,
|
| 32 |
+
|
| 33 |
+
# AI/ML specific
|
| 34 |
+
"openai.com": 0.85,
|
| 35 |
+
"anthropic.com": 0.85,
|
| 36 |
+
"huggingface.co": 0.8,
|
| 37 |
+
"deepmind.google": 0.85,
|
| 38 |
+
"ai.meta.com": 0.8,
|
| 39 |
+
|
| 40 |
+
# Tech publications
|
| 41 |
+
"techcrunch.com": 0.7,
|
| 42 |
+
"wired.com": 0.7,
|
| 43 |
+
"arstechnica.com": 0.75,
|
| 44 |
+
"theverge.com": 0.7,
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
# Low authority patterns (less reliable)
|
| 48 |
+
LOW_AUTHORITY_PATTERNS = [
|
| 49 |
+
"medium.com", # User-generated, variable quality
|
| 50 |
+
"reddit.com", # Forum, variable quality
|
| 51 |
+
"quora.com", # Q&A, variable quality
|
| 52 |
+
"blogspot.com",
|
| 53 |
+
"wordpress.com",
|
| 54 |
+
"tumblr.com",
|
| 55 |
+
]
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def calculate_authority_score(url: str) -> float:
|
| 59 |
+
"""
|
| 60 |
+
Calculate domain authority score for a URL.
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
url: The URL to score
|
| 64 |
+
|
| 65 |
+
Returns:
|
| 66 |
+
Authority score between 0.0 and 1.0
|
| 67 |
+
"""
|
| 68 |
+
if not url:
|
| 69 |
+
return 0.5
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
parsed = urlparse(url)
|
| 73 |
+
domain = parsed.netloc.lower()
|
| 74 |
+
|
| 75 |
+
# Remove www. prefix
|
| 76 |
+
if domain.startswith("www."):
|
| 77 |
+
domain = domain[4:]
|
| 78 |
+
|
| 79 |
+
# Check for exact domain matches
|
| 80 |
+
for known_domain, score in HIGH_AUTHORITY_DOMAINS.items():
|
| 81 |
+
if domain == known_domain or domain.endswith(known_domain):
|
| 82 |
+
return score
|
| 83 |
+
|
| 84 |
+
# Check for TLD-based authority (.edu, .gov, etc.)
|
| 85 |
+
for tld, score in HIGH_AUTHORITY_DOMAINS.items():
|
| 86 |
+
if tld.startswith(".") and domain.endswith(tld):
|
| 87 |
+
return score
|
| 88 |
+
|
| 89 |
+
# Check for low authority patterns
|
| 90 |
+
for pattern in LOW_AUTHORITY_PATTERNS:
|
| 91 |
+
if pattern in domain:
|
| 92 |
+
return 0.4
|
| 93 |
+
|
| 94 |
+
# Default score for unknown domains
|
| 95 |
+
return 0.5
|
| 96 |
+
|
| 97 |
+
except Exception:
|
| 98 |
+
return 0.5
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def get_domain_category(url: str) -> str:
|
| 102 |
+
"""
|
| 103 |
+
Get a category label for the domain.
|
| 104 |
+
|
| 105 |
+
Args:
|
| 106 |
+
url: The URL to categorize
|
| 107 |
+
|
| 108 |
+
Returns:
|
| 109 |
+
Category string like "Academic", "News", "Tech", etc.
|
| 110 |
+
"""
|
| 111 |
+
if not url:
|
| 112 |
+
return "Unknown"
|
| 113 |
+
|
| 114 |
+
try:
|
| 115 |
+
parsed = urlparse(url)
|
| 116 |
+
domain = parsed.netloc.lower()
|
| 117 |
+
|
| 118 |
+
if ".edu" in domain or ".ac.uk" in domain or "arxiv" in domain:
|
| 119 |
+
return "Academic"
|
| 120 |
+
elif ".gov" in domain:
|
| 121 |
+
return "Government"
|
| 122 |
+
elif any(site in domain for site in ["github", "stackoverflow", "docs."]):
|
| 123 |
+
return "Developer"
|
| 124 |
+
elif any(site in domain for site in ["reuters", "bbc", "nytimes", "cnn", "guardian"]):
|
| 125 |
+
return "News"
|
| 126 |
+
elif any(site in domain for site in ["openai", "anthropic", "huggingface", "deepmind"]):
|
| 127 |
+
return "AI/ML"
|
| 128 |
+
elif "wikipedia" in domain:
|
| 129 |
+
return "Reference"
|
| 130 |
+
else:
|
| 131 |
+
return "General"
|
| 132 |
+
|
| 133 |
+
except Exception:
|
| 134 |
+
return "Unknown"
|
app/reranking/pipeline.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Multi-stage reranking pipeline.
|
| 2 |
+
|
| 3 |
+
Implements a 3-stage reranking approach:
|
| 4 |
+
1. Bi-Encoder: Fast semantic similarity (optional, for large result sets)
|
| 5 |
+
2. Cross-Encoder: Accurate relevance scoring
|
| 6 |
+
3. Temporal + Authority: Freshness and domain trust weighting
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from typing import Optional
|
| 10 |
+
|
| 11 |
+
from app.temporal.freshness_scorer import calculate_freshness_score, adjust_score_by_freshness
|
| 12 |
+
from app.reranking.authority_scorer import calculate_authority_score
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
async def rerank_results(
|
| 16 |
+
query: str,
|
| 17 |
+
results: list[dict],
|
| 18 |
+
temporal_urgency: float = 0.5,
|
| 19 |
+
max_results: int = 10,
|
| 20 |
+
) -> list[dict]:
|
| 21 |
+
"""
|
| 22 |
+
Apply multi-stage reranking to search results.
|
| 23 |
+
|
| 24 |
+
For MVP, we use a simplified pipeline:
|
| 25 |
+
- Calculate freshness scores
|
| 26 |
+
- Calculate authority scores
|
| 27 |
+
- Combine with original relevance scores
|
| 28 |
+
|
| 29 |
+
Full pipeline with embeddings can be enabled later.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
query: Original search query
|
| 33 |
+
results: Raw search results
|
| 34 |
+
temporal_urgency: How important freshness is (0-1)
|
| 35 |
+
max_results: Maximum results to return
|
| 36 |
+
|
| 37 |
+
Returns:
|
| 38 |
+
Reranked results with updated scores
|
| 39 |
+
"""
|
| 40 |
+
if not results:
|
| 41 |
+
return []
|
| 42 |
+
|
| 43 |
+
# Stage 1: Skip bi-encoder for now (MVP)
|
| 44 |
+
# In production, use sentence-transformers for initial filtering of 100+ results
|
| 45 |
+
|
| 46 |
+
# Stage 2: Skip cross-encoder for now (MVP)
|
| 47 |
+
# In production, use BGE-reranker for precise scoring
|
| 48 |
+
|
| 49 |
+
# Stage 3: Apply temporal + authority scoring
|
| 50 |
+
scored_results = []
|
| 51 |
+
|
| 52 |
+
for result in results:
|
| 53 |
+
# Calculate freshness score
|
| 54 |
+
freshness = calculate_freshness_score(result.get("published_date"))
|
| 55 |
+
result["freshness_score"] = freshness
|
| 56 |
+
|
| 57 |
+
# Calculate authority score
|
| 58 |
+
authority = calculate_authority_score(result.get("url", ""))
|
| 59 |
+
result["authority_score"] = authority
|
| 60 |
+
|
| 61 |
+
# Get base score (from search source)
|
| 62 |
+
base_score = result.get("score", 0.5)
|
| 63 |
+
|
| 64 |
+
# Adjust for freshness based on temporal urgency
|
| 65 |
+
adjusted_score = adjust_score_by_freshness(
|
| 66 |
+
base_score=base_score,
|
| 67 |
+
freshness_score=freshness,
|
| 68 |
+
temporal_urgency=temporal_urgency,
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
# Also factor in authority (10% weight)
|
| 72 |
+
final_score = (adjusted_score * 0.9) + (authority * 0.1)
|
| 73 |
+
result["score"] = final_score
|
| 74 |
+
|
| 75 |
+
scored_results.append(result)
|
| 76 |
+
|
| 77 |
+
# Sort by final score (descending)
|
| 78 |
+
scored_results.sort(key=lambda x: x["score"], reverse=True)
|
| 79 |
+
|
| 80 |
+
return scored_results[:max_results]
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
async def rerank_with_embeddings(
|
| 84 |
+
query: str,
|
| 85 |
+
results: list[dict],
|
| 86 |
+
max_results: int = 10,
|
| 87 |
+
) -> list[dict]:
|
| 88 |
+
"""
|
| 89 |
+
Full reranking with embedding models.
|
| 90 |
+
|
| 91 |
+
TODO: Implement when adding sentence-transformers support:
|
| 92 |
+
1. Use bi-encoder for fast filtering
|
| 93 |
+
2. Use cross-encoder for precise scoring
|
| 94 |
+
|
| 95 |
+
This is a placeholder for the full implementation.
|
| 96 |
+
"""
|
| 97 |
+
# For now, just return sorted by original score
|
| 98 |
+
sorted_results = sorted(results, key=lambda x: x.get("score", 0), reverse=True)
|
| 99 |
+
return sorted_results[:max_results]
|
app/sources/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Search sources module."""
|
app/sources/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (190 Bytes). View file
|
|
|
app/sources/__pycache__/duckduckgo.cpython-311.pyc
ADDED
|
Binary file (3.69 kB). View file
|
|
|
app/sources/__pycache__/tavily.cpython-311.pyc
ADDED
|
Binary file (4.15 kB). View file
|
|
|
app/sources/duckduckgo.py
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""DuckDuckGo search source (free fallback).
|
| 2 |
+
|
| 3 |
+
Uses the duckduckgo_search library for free web search.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from datetime import datetime, timedelta
|
| 7 |
+
from typing import Optional
|
| 8 |
+
|
| 9 |
+
import httpx
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
async def search_duckduckgo(
|
| 13 |
+
query: str,
|
| 14 |
+
max_results: int = 10,
|
| 15 |
+
region: str = "wt-wt", # Worldwide
|
| 16 |
+
) -> list[dict]:
|
| 17 |
+
"""
|
| 18 |
+
Search using DuckDuckGo (free, no API key required).
|
| 19 |
+
|
| 20 |
+
This is a fallback when other sources are unavailable.
|
| 21 |
+
Uses the HTML endpoint for basic search.
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
query: Search query
|
| 25 |
+
max_results: Maximum results to return
|
| 26 |
+
region: Region code
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
List of result dicts with title, url, content
|
| 30 |
+
"""
|
| 31 |
+
try:
|
| 32 |
+
# Use DuckDuckGo HTML API (lightweight, no JS needed)
|
| 33 |
+
params = {
|
| 34 |
+
"q": query,
|
| 35 |
+
"kl": region,
|
| 36 |
+
"kp": "-1", # Safe search off
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
headers = {
|
| 40 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
async with httpx.AsyncClient(timeout=15.0) as client:
|
| 44 |
+
# Use DuckDuckGo Lite (simpler to parse)
|
| 45 |
+
response = await client.get(
|
| 46 |
+
"https://lite.duckduckgo.com/lite/",
|
| 47 |
+
params=params,
|
| 48 |
+
headers=headers,
|
| 49 |
+
follow_redirects=True,
|
| 50 |
+
)
|
| 51 |
+
response.raise_for_status()
|
| 52 |
+
html = response.text
|
| 53 |
+
|
| 54 |
+
# Simple HTML parsing for results
|
| 55 |
+
results = parse_ddg_lite_results(html, max_results)
|
| 56 |
+
return results
|
| 57 |
+
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"DuckDuckGo search error: {e}")
|
| 60 |
+
return []
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def parse_ddg_lite_results(html: str, max_results: int) -> list[dict]:
|
| 64 |
+
"""
|
| 65 |
+
Parse DuckDuckGo Lite HTML results.
|
| 66 |
+
|
| 67 |
+
This is a simple parser for the lite version of DDG.
|
| 68 |
+
"""
|
| 69 |
+
import re
|
| 70 |
+
|
| 71 |
+
results = []
|
| 72 |
+
|
| 73 |
+
# Find all result links (class="result-link")
|
| 74 |
+
# Pattern: <a rel="nofollow" href="URL" class='result-link'>TITLE</a>
|
| 75 |
+
link_pattern = r'<a[^>]*class=["\']result-link["\'][^>]*href=["\']([^"\']+)["\'][^>]*>([^<]+)</a>'
|
| 76 |
+
|
| 77 |
+
# Find snippets (class="result-snippet")
|
| 78 |
+
snippet_pattern = r'<td[^>]*class=["\']result-snippet["\'][^>]*>([^<]+)</td>'
|
| 79 |
+
|
| 80 |
+
links = re.findall(link_pattern, html, re.IGNORECASE)
|
| 81 |
+
snippets = re.findall(snippet_pattern, html, re.IGNORECASE)
|
| 82 |
+
|
| 83 |
+
for i, (url, title) in enumerate(links[:max_results]):
|
| 84 |
+
content = snippets[i] if i < len(snippets) else ""
|
| 85 |
+
|
| 86 |
+
# Clean up HTML entities
|
| 87 |
+
title = title.strip()
|
| 88 |
+
content = content.strip()
|
| 89 |
+
|
| 90 |
+
# Skip DuckDuckGo internal links
|
| 91 |
+
if "duckduckgo.com" in url:
|
| 92 |
+
continue
|
| 93 |
+
|
| 94 |
+
results.append({
|
| 95 |
+
"title": title,
|
| 96 |
+
"url": url,
|
| 97 |
+
"content": content,
|
| 98 |
+
"published_date": None, # DDG Lite doesn't provide dates
|
| 99 |
+
"score": 0.5, # Neutral score, will be reranked
|
| 100 |
+
"source": "duckduckgo",
|
| 101 |
+
})
|
| 102 |
+
|
| 103 |
+
return results[:max_results]
|
app/sources/tavily.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tavily search source integration.
|
| 2 |
+
|
| 3 |
+
Tavily provides high-quality, AI-optimized search results.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from typing import Literal, Optional
|
| 8 |
+
|
| 9 |
+
import httpx
|
| 10 |
+
|
| 11 |
+
from app.config import get_settings
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
async def search_tavily(
|
| 15 |
+
query: str,
|
| 16 |
+
max_results: int = 10,
|
| 17 |
+
freshness: Literal["day", "week", "month", "year", "any"] = "any",
|
| 18 |
+
include_domains: Optional[list[str]] = None,
|
| 19 |
+
exclude_domains: Optional[list[str]] = None,
|
| 20 |
+
search_depth: Literal["basic", "advanced"] = "advanced",
|
| 21 |
+
) -> list[dict]:
|
| 22 |
+
"""
|
| 23 |
+
Search using Tavily API.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
query: Search query
|
| 27 |
+
max_results: Maximum results to return
|
| 28 |
+
freshness: Filter by recency
|
| 29 |
+
include_domains: Only include these domains
|
| 30 |
+
exclude_domains: Exclude these domains
|
| 31 |
+
search_depth: "basic" (fast) or "advanced" (thorough)
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
List of result dicts with title, url, content, published_date, score
|
| 35 |
+
"""
|
| 36 |
+
settings = get_settings()
|
| 37 |
+
|
| 38 |
+
if not settings.tavily_api_key:
|
| 39 |
+
return []
|
| 40 |
+
|
| 41 |
+
# Map freshness to Tavily's days parameter
|
| 42 |
+
days_map = {
|
| 43 |
+
"day": 1,
|
| 44 |
+
"week": 7,
|
| 45 |
+
"month": 30,
|
| 46 |
+
"year": 365,
|
| 47 |
+
"any": None,
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
payload = {
|
| 51 |
+
"api_key": settings.tavily_api_key,
|
| 52 |
+
"query": query,
|
| 53 |
+
"search_depth": search_depth,
|
| 54 |
+
"max_results": max_results,
|
| 55 |
+
"include_answer": False,
|
| 56 |
+
"include_raw_content": False,
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
# Add optional filters
|
| 60 |
+
if days_map.get(freshness):
|
| 61 |
+
payload["days"] = days_map[freshness]
|
| 62 |
+
|
| 63 |
+
if include_domains:
|
| 64 |
+
payload["include_domains"] = include_domains
|
| 65 |
+
|
| 66 |
+
if exclude_domains:
|
| 67 |
+
payload["exclude_domains"] = exclude_domains
|
| 68 |
+
|
| 69 |
+
try:
|
| 70 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 71 |
+
response = await client.post(
|
| 72 |
+
"https://api.tavily.com/search",
|
| 73 |
+
json=payload,
|
| 74 |
+
)
|
| 75 |
+
response.raise_for_status()
|
| 76 |
+
data = response.json()
|
| 77 |
+
|
| 78 |
+
results = []
|
| 79 |
+
for item in data.get("results", []):
|
| 80 |
+
# Parse published date if available
|
| 81 |
+
pub_date = None
|
| 82 |
+
if "published_date" in item and item["published_date"]:
|
| 83 |
+
try:
|
| 84 |
+
pub_date = datetime.fromisoformat(
|
| 85 |
+
item["published_date"].replace("Z", "+00:00")
|
| 86 |
+
)
|
| 87 |
+
except (ValueError, TypeError):
|
| 88 |
+
pass
|
| 89 |
+
|
| 90 |
+
results.append({
|
| 91 |
+
"title": item.get("title", ""),
|
| 92 |
+
"url": item.get("url", ""),
|
| 93 |
+
"content": item.get("content", ""),
|
| 94 |
+
"published_date": pub_date,
|
| 95 |
+
"score": item.get("score", 0.5),
|
| 96 |
+
"source": "tavily",
|
| 97 |
+
})
|
| 98 |
+
|
| 99 |
+
return results
|
| 100 |
+
|
| 101 |
+
except httpx.HTTPError as e:
|
| 102 |
+
print(f"Tavily search error: {e}")
|
| 103 |
+
return []
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f"Tavily unexpected error: {e}")
|
| 106 |
+
return []
|
app/temporal/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Temporal intelligence module."""
|
app/temporal/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (198 Bytes). View file
|
|
|
app/temporal/__pycache__/freshness_scorer.cpython-311.pyc
ADDED
|
Binary file (3.81 kB). View file
|
|
|
app/temporal/__pycache__/intent_detector.cpython-311.pyc
ADDED
|
Binary file (3.01 kB). View file
|
|
|