Madras1 commited on
Commit
dc2d570
·
verified ·
1 Parent(s): 3b5c441

Upload 53 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.example +11 -0
  2. .gitattributes +5 -0
  3. .gitignore +45 -0
  4. .venv/.gitignore +1 -0
  5. .venv/bin/Activate.ps1 +247 -0
  6. .venv/bin/activate +63 -0
  7. .venv/bin/python.exe +3 -0
  8. .venv/bin/python3.11.exe +3 -0
  9. .venv/bin/python3.exe +3 -0
  10. .venv/bin/python3w.exe +3 -0
  11. .venv/bin/pythonw.exe +3 -0
  12. .venv/pyvenv.cfg +5 -0
  13. Dockerfile +21 -0
  14. README.md +54 -11
  15. app/__init__.py +3 -0
  16. app/__pycache__/__init__.cpython-311.pyc +0 -0
  17. app/__pycache__/config.cpython-311.pyc +0 -0
  18. app/__pycache__/main.cpython-311.pyc +0 -0
  19. app/agents/__init__.py +1 -0
  20. app/agents/__pycache__/__init__.cpython-311.pyc +0 -0
  21. app/agents/__pycache__/llm_client.cpython-311.pyc +0 -0
  22. app/agents/__pycache__/synthesizer.cpython-311.pyc +0 -0
  23. app/agents/llm_client.py +105 -0
  24. app/agents/synthesizer.py +127 -0
  25. app/api/__init__.py +1 -0
  26. app/api/__pycache__/__init__.cpython-311.pyc +0 -0
  27. app/api/__pycache__/schemas.cpython-311.pyc +0 -0
  28. app/api/routes/__init__.py +1 -0
  29. app/api/routes/__pycache__/__init__.cpython-311.pyc +0 -0
  30. app/api/routes/__pycache__/search.cpython-311.pyc +0 -0
  31. app/api/routes/search.py +146 -0
  32. app/api/schemas.py +112 -0
  33. app/config.py +52 -0
  34. app/main.py +64 -0
  35. app/reranking/__init__.py +1 -0
  36. app/reranking/__pycache__/__init__.cpython-311.pyc +0 -0
  37. app/reranking/__pycache__/authority_scorer.cpython-311.pyc +0 -0
  38. app/reranking/__pycache__/pipeline.cpython-311.pyc +0 -0
  39. app/reranking/authority_scorer.py +134 -0
  40. app/reranking/pipeline.py +99 -0
  41. app/sources/__init__.py +1 -0
  42. app/sources/__pycache__/__init__.cpython-311.pyc +0 -0
  43. app/sources/__pycache__/duckduckgo.cpython-311.pyc +0 -0
  44. app/sources/__pycache__/tavily.cpython-311.pyc +0 -0
  45. app/sources/duckduckgo.py +103 -0
  46. app/sources/tavily.py +106 -0
  47. app/temporal/__init__.py +1 -0
  48. app/temporal/__pycache__/__init__.cpython-311.pyc +0 -0
  49. app/temporal/__pycache__/freshness_scorer.cpython-311.pyc +0 -0
  50. app/temporal/__pycache__/intent_detector.cpython-311.pyc +0 -0
.env.example ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LLM Providers (choose one or both)
2
+ GROQ_API_KEY=gsk_your_groq_key
3
+ OPENROUTER_API_KEY=sk-or-your_openrouter_key
4
+
5
+ # Search Sources
6
+ TAVILY_API_KEY=tvly-your_tavily_key
7
+ SERPER_API_KEY=your_serper_key # Optional
8
+
9
+ # Configuration
10
+ LLM_PROVIDER=groq # or "openrouter"
11
+ LLM_MODEL=llama-3.3-70b-versatile
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ .venv/bin/python.exe filter=lfs diff=lfs merge=lfs -text
37
+ .venv/bin/python3.11.exe filter=lfs diff=lfs merge=lfs -text
38
+ .venv/bin/python3.exe filter=lfs diff=lfs merge=lfs -text
39
+ .venv/bin/python3w.exe filter=lfs diff=lfs merge=lfs -text
40
+ .venv/bin/pythonw.exe filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HuggingFace Spaces files
2
+ *.hf
3
+ .hf
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+ *.so
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # Virtual environments
28
+ .env
29
+ .venv/
30
+ venv/
31
+ ENV/
32
+
33
+ # IDE
34
+ .idea/
35
+ .vscode/
36
+ *.swp
37
+ *.swo
38
+
39
+ # OS
40
+ .DS_Store
41
+ Thumbs.db
42
+
43
+ # Local development
44
+ *.log
45
+ .cache/
.venv/.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *
.venv/bin/Activate.ps1 ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <#
2
+ .Synopsis
3
+ Activate a Python virtual environment for the current PowerShell session.
4
+
5
+ .Description
6
+ Pushes the python executable for a virtual environment to the front of the
7
+ $Env:PATH environment variable and sets the prompt to signify that you are
8
+ in a Python virtual environment. Makes use of the command line switches as
9
+ well as the `pyvenv.cfg` file values present in the virtual environment.
10
+
11
+ .Parameter VenvDir
12
+ Path to the directory that contains the virtual environment to activate. The
13
+ default value for this is the parent of the directory that the Activate.ps1
14
+ script is located within.
15
+
16
+ .Parameter Prompt
17
+ The prompt prefix to display when this virtual environment is activated. By
18
+ default, this prompt is the name of the virtual environment folder (VenvDir)
19
+ surrounded by parentheses and followed by a single space (ie. '(.venv) ').
20
+
21
+ .Example
22
+ Activate.ps1
23
+ Activates the Python virtual environment that contains the Activate.ps1 script.
24
+
25
+ .Example
26
+ Activate.ps1 -Verbose
27
+ Activates the Python virtual environment that contains the Activate.ps1 script,
28
+ and shows extra information about the activation as it executes.
29
+
30
+ .Example
31
+ Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
32
+ Activates the Python virtual environment located in the specified location.
33
+
34
+ .Example
35
+ Activate.ps1 -Prompt "MyPython"
36
+ Activates the Python virtual environment that contains the Activate.ps1 script,
37
+ and prefixes the current prompt with the specified string (surrounded in
38
+ parentheses) while the virtual environment is active.
39
+
40
+ .Notes
41
+ On Windows, it may be required to enable this Activate.ps1 script by setting the
42
+ execution policy for the user. You can do this by issuing the following PowerShell
43
+ command:
44
+
45
+ PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
46
+
47
+ For more information on Execution Policies:
48
+ https://go.microsoft.com/fwlink/?LinkID=135170
49
+
50
+ #>
51
+ Param(
52
+ [Parameter(Mandatory = $false)]
53
+ [String]
54
+ $VenvDir,
55
+ [Parameter(Mandatory = $false)]
56
+ [String]
57
+ $Prompt
58
+ )
59
+
60
+ <# Function declarations --------------------------------------------------- #>
61
+
62
+ <#
63
+ .Synopsis
64
+ Remove all shell session elements added by the Activate script, including the
65
+ addition of the virtual environment's Python executable from the beginning of
66
+ the PATH variable.
67
+
68
+ .Parameter NonDestructive
69
+ If present, do not remove this function from the global namespace for the
70
+ session.
71
+
72
+ #>
73
+ function global:deactivate ([switch]$NonDestructive) {
74
+ # Revert to original values
75
+
76
+ # The prior prompt:
77
+ if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
78
+ Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
79
+ Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
80
+ }
81
+
82
+ # The prior PYTHONHOME:
83
+ if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
84
+ Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
85
+ Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
86
+ }
87
+
88
+ # The prior PATH:
89
+ if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
90
+ Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
91
+ Remove-Item -Path Env:_OLD_VIRTUAL_PATH
92
+ }
93
+
94
+ # Just remove the VIRTUAL_ENV altogether:
95
+ if (Test-Path -Path Env:VIRTUAL_ENV) {
96
+ Remove-Item -Path env:VIRTUAL_ENV
97
+ }
98
+
99
+ # Just remove VIRTUAL_ENV_PROMPT altogether.
100
+ if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
101
+ Remove-Item -Path env:VIRTUAL_ENV_PROMPT
102
+ }
103
+
104
+ # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
105
+ if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
106
+ Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
107
+ }
108
+
109
+ # Leave deactivate function in the global namespace if requested:
110
+ if (-not $NonDestructive) {
111
+ Remove-Item -Path function:deactivate
112
+ }
113
+ }
114
+
115
+ <#
116
+ .Description
117
+ Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
118
+ given folder, and returns them in a map.
119
+
120
+ For each line in the pyvenv.cfg file, if that line can be parsed into exactly
121
+ two strings separated by `=` (with any amount of whitespace surrounding the =)
122
+ then it is considered a `key = value` line. The left hand string is the key,
123
+ the right hand is the value.
124
+
125
+ If the value starts with a `'` or a `"` then the first and last character is
126
+ stripped from the value before being captured.
127
+
128
+ .Parameter ConfigDir
129
+ Path to the directory that contains the `pyvenv.cfg` file.
130
+ #>
131
+ function Get-PyVenvConfig(
132
+ [String]
133
+ $ConfigDir
134
+ ) {
135
+ Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
136
+
137
+ # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
138
+ $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
139
+
140
+ # An empty map will be returned if no config file is found.
141
+ $pyvenvConfig = @{ }
142
+
143
+ if ($pyvenvConfigPath) {
144
+
145
+ Write-Verbose "File exists, parse `key = value` lines"
146
+ $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
147
+
148
+ $pyvenvConfigContent | ForEach-Object {
149
+ $keyval = $PSItem -split "\s*=\s*", 2
150
+ if ($keyval[0] -and $keyval[1]) {
151
+ $val = $keyval[1]
152
+
153
+ # Remove extraneous quotations around a string value.
154
+ if ("'""".Contains($val.Substring(0, 1))) {
155
+ $val = $val.Substring(1, $val.Length - 2)
156
+ }
157
+
158
+ $pyvenvConfig[$keyval[0]] = $val
159
+ Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
160
+ }
161
+ }
162
+ }
163
+ return $pyvenvConfig
164
+ }
165
+
166
+
167
+ <# Begin Activate script --------------------------------------------------- #>
168
+
169
+ # Determine the containing directory of this script
170
+ $VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
171
+ $VenvExecDir = Get-Item -Path $VenvExecPath
172
+
173
+ Write-Verbose "Activation script is located in path: '$VenvExecPath'"
174
+ Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
175
+ Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
176
+
177
+ # Set values required in priority: CmdLine, ConfigFile, Default
178
+ # First, get the location of the virtual environment, it might not be
179
+ # VenvExecDir if specified on the command line.
180
+ if ($VenvDir) {
181
+ Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
182
+ }
183
+ else {
184
+ Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
185
+ $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
186
+ Write-Verbose "VenvDir=$VenvDir"
187
+ }
188
+
189
+ # Next, read the `pyvenv.cfg` file to determine any required value such
190
+ # as `prompt`.
191
+ $pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
192
+
193
+ # Next, set the prompt from the command line, or the config file, or
194
+ # just use the name of the virtual environment folder.
195
+ if ($Prompt) {
196
+ Write-Verbose "Prompt specified as argument, using '$Prompt'"
197
+ }
198
+ else {
199
+ Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
200
+ if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
201
+ Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
202
+ $Prompt = $pyvenvCfg['prompt'];
203
+ }
204
+ else {
205
+ Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
206
+ Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
207
+ $Prompt = Split-Path -Path $venvDir -Leaf
208
+ }
209
+ }
210
+
211
+ Write-Verbose "Prompt = '$Prompt'"
212
+ Write-Verbose "VenvDir='$VenvDir'"
213
+
214
+ # Deactivate any currently active virtual environment, but leave the
215
+ # deactivate function in place.
216
+ deactivate -nondestructive
217
+
218
+ # Now set the environment variable VIRTUAL_ENV, used by many tools to determine
219
+ # that there is an activated venv.
220
+ $env:VIRTUAL_ENV = $VenvDir
221
+
222
+ if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
223
+
224
+ Write-Verbose "Setting prompt to '$Prompt'"
225
+
226
+ # Set the prompt to include the env name
227
+ # Make sure _OLD_VIRTUAL_PROMPT is global
228
+ function global:_OLD_VIRTUAL_PROMPT { "" }
229
+ Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
230
+ New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
231
+
232
+ function global:prompt {
233
+ Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
234
+ _OLD_VIRTUAL_PROMPT
235
+ }
236
+ $env:VIRTUAL_ENV_PROMPT = $Prompt
237
+ }
238
+
239
+ # Clear PYTHONHOME
240
+ if (Test-Path -Path Env:PYTHONHOME) {
241
+ Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
242
+ Remove-Item -Path Env:PYTHONHOME
243
+ }
244
+
245
+ # Add the venv to the PATH
246
+ Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
247
+ $Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
.venv/bin/activate ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file must be used with "source bin/activate" *from bash*
2
+ # you cannot run it directly
3
+
4
+ deactivate () {
5
+ # reset old environment variables
6
+ if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
7
+ PATH="${_OLD_VIRTUAL_PATH:-}"
8
+ export PATH
9
+ unset _OLD_VIRTUAL_PATH
10
+ fi
11
+ if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
12
+ PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
13
+ export PYTHONHOME
14
+ unset _OLD_VIRTUAL_PYTHONHOME
15
+ fi
16
+
17
+ # Call hash to forget past commands. Without forgetting
18
+ # past commands the $PATH changes we made may not be respected
19
+ hash -r 2> /dev/null
20
+
21
+ if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
22
+ PS1="${_OLD_VIRTUAL_PS1:-}"
23
+ export PS1
24
+ unset _OLD_VIRTUAL_PS1
25
+ fi
26
+
27
+ unset VIRTUAL_ENV
28
+ unset VIRTUAL_ENV_PROMPT
29
+ if [ ! "${1:-}" = "nondestructive" ] ; then
30
+ # Self destruct!
31
+ unset -f deactivate
32
+ fi
33
+ }
34
+
35
+ # unset irrelevant variables
36
+ deactivate nondestructive
37
+
38
+ VIRTUAL_ENV=$(cygpath "C:\Users\gabri\Lancer\.venv")
39
+ export VIRTUAL_ENV
40
+
41
+ _OLD_VIRTUAL_PATH="$PATH"
42
+ PATH="$VIRTUAL_ENV/bin:$PATH"
43
+ export PATH
44
+
45
+ # unset PYTHONHOME if set
46
+ # this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
47
+ # could use `if (set -u; : $PYTHONHOME) ;` in bash
48
+ if [ -n "${PYTHONHOME:-}" ] ; then
49
+ _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
50
+ unset PYTHONHOME
51
+ fi
52
+
53
+ if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
54
+ _OLD_VIRTUAL_PS1="${PS1:-}"
55
+ PS1="(.venv) ${PS1:-}"
56
+ export PS1
57
+ VIRTUAL_ENV_PROMPT="(.venv) "
58
+ export VIRTUAL_ENV_PROMPT
59
+ fi
60
+
61
+ # Call hash to forget past commands. Without forgetting
62
+ # past commands the $PATH changes we made may not be respected
63
+ hash -r 2> /dev/null
.venv/bin/python.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af09b8342333dd7ac86931f8542366d4cd8e733993e8442d7abe025dcffbfce
3
+ size 138549
.venv/bin/python3.11.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af09b8342333dd7ac86931f8542366d4cd8e733993e8442d7abe025dcffbfce
3
+ size 138549
.venv/bin/python3.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af09b8342333dd7ac86931f8542366d4cd8e733993e8442d7abe025dcffbfce
3
+ size 138549
.venv/bin/python3w.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:852435742fd9f70e20c4f9f9c0472f79247543bd88d72f16a70410e0a8a7b1d7
3
+ size 112963
.venv/bin/pythonw.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:852435742fd9f70e20c4f9f9c0472f79247543bd88d72f16a70410e0a8a7b1d7
3
+ size 112963
.venv/pyvenv.cfg ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ home = C:\Program Files\Inkscape\bin
2
+ include-system-site-packages = false
3
+ version = 3.11.10
4
+ executable = C:\Program Files\Inkscape\bin\python.exe
5
+ command = C:\Program Files\Inkscape\bin\python.exe -m venv --without-pip C:\Users\gabri\Lancer\.venv
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies for torch
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy and install dependencies
11
+ COPY pyproject.toml .
12
+ RUN pip install --no-cache-dir -e .
13
+
14
+ # Copy application
15
+ COPY app/ ./app/
16
+
17
+ # HuggingFace Spaces uses port 7860
18
+ EXPOSE 7860
19
+
20
+ # Run with uvicorn
21
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,54 @@
1
- ---
2
- title: Lancer
3
- emoji: 👀
4
- colorFrom: purple
5
- colorTo: pink
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Lancer Search API
3
+ emoji: 🔍
4
+ colorFrom: purple
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ # Lancer Search API
11
+
12
+ 🔍 Advanced AI-powered search API with temporal intelligence.
13
+
14
+ ## Features
15
+
16
+ - **Temporal Intelligence**: Understands when you need fresh vs historical info
17
+ - **Multi-Stage Reranking**: Freshness + Authority scoring
18
+ - **Multi-Source Search**: Tavily, DuckDuckGo
19
+ - **LLM Synthesis**: Groq or OpenRouter
20
+
21
+ ## API Endpoints
22
+
23
+ ```bash
24
+ # Search with synthesis
25
+ POST /api/v1/search
26
+ {
27
+ "query": "What is the latest GPT model?",
28
+ "max_results": 10,
29
+ "freshness": "week"
30
+ }
31
+
32
+ # Health check
33
+ GET /health
34
+ ```
35
+
36
+ ## Environment Variables
37
+
38
+ Configure these in HuggingFace Space Secrets:
39
+
40
+ | Variable | Required | Description |
41
+ |----------|----------|-------------|
42
+ | `GROQ_API_KEY` | Yes* | Groq API key |
43
+ | `OPENROUTER_API_KEY` | Yes* | OpenRouter API key |
44
+ | `TAVILY_API_KEY` | Yes | Tavily search API key |
45
+ | `LLM_PROVIDER` | No | "groq" or "openrouter" |
46
+
47
+ *At least one LLM provider key required
48
+
49
+ ## Local Development
50
+
51
+ ```bash
52
+ pip install -e .
53
+ uvicorn app.main:app --reload
54
+ ```
app/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ """Lancer - Advanced AI Search API"""
2
+
3
+ __version__ = "0.1.0"
app/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (218 Bytes). View file
 
app/__pycache__/config.cpython-311.pyc ADDED
Binary file (2.28 kB). View file
 
app/__pycache__/main.cpython-311.pyc ADDED
Binary file (2.5 kB). View file
 
app/agents/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Agents module."""
app/agents/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (181 Bytes). View file
 
app/agents/__pycache__/llm_client.cpython-311.pyc ADDED
Binary file (4.42 kB). View file
 
app/agents/__pycache__/synthesizer.cpython-311.pyc ADDED
Binary file (4.98 kB). View file
 
app/agents/llm_client.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LLM client abstraction for multiple providers.
2
+
3
+ Supports Groq and OpenRouter for LLM inference.
4
+ """
5
+
6
+ import httpx
7
+ from typing import Optional
8
+
9
+ from app.config import get_settings
10
+
11
+
12
+ async def generate_completion(
13
+ messages: list[dict],
14
+ model: Optional[str] = None,
15
+ temperature: float = 0.3,
16
+ max_tokens: int = 2048,
17
+ ) -> str:
18
+ """
19
+ Generate a completion using the configured LLM provider.
20
+
21
+ Args:
22
+ messages: List of message dicts with 'role' and 'content'
23
+ model: Model override (uses settings default if None)
24
+ temperature: Sampling temperature
25
+ max_tokens: Maximum tokens to generate
26
+
27
+ Returns:
28
+ Generated text content
29
+ """
30
+ settings = get_settings()
31
+ provider = settings.llm_provider
32
+ model = model or settings.llm_model
33
+
34
+ if provider == "groq":
35
+ return await _call_groq(messages, model, temperature, max_tokens)
36
+ elif provider == "openrouter":
37
+ return await _call_openrouter(messages, model, temperature, max_tokens)
38
+ else:
39
+ raise ValueError(f"Unknown LLM provider: {provider}")
40
+
41
+
42
+ async def _call_groq(
43
+ messages: list[dict],
44
+ model: str,
45
+ temperature: float,
46
+ max_tokens: int,
47
+ ) -> str:
48
+ """Call Groq API."""
49
+ settings = get_settings()
50
+
51
+ if not settings.groq_api_key:
52
+ raise ValueError("GROQ_API_KEY not configured")
53
+
54
+ async with httpx.AsyncClient(timeout=60.0) as client:
55
+ response = await client.post(
56
+ "https://api.groq.com/openai/v1/chat/completions",
57
+ headers={
58
+ "Authorization": f"Bearer {settings.groq_api_key}",
59
+ "Content-Type": "application/json",
60
+ },
61
+ json={
62
+ "model": model,
63
+ "messages": messages,
64
+ "temperature": temperature,
65
+ "max_tokens": max_tokens,
66
+ },
67
+ )
68
+ response.raise_for_status()
69
+ data = response.json()
70
+
71
+ return data["choices"][0]["message"]["content"]
72
+
73
+
74
+ async def _call_openrouter(
75
+ messages: list[dict],
76
+ model: str,
77
+ temperature: float,
78
+ max_tokens: int,
79
+ ) -> str:
80
+ """Call OpenRouter API."""
81
+ settings = get_settings()
82
+
83
+ if not settings.openrouter_api_key:
84
+ raise ValueError("OPENROUTER_API_KEY not configured")
85
+
86
+ async with httpx.AsyncClient(timeout=60.0) as client:
87
+ response = await client.post(
88
+ "https://openrouter.ai/api/v1/chat/completions",
89
+ headers={
90
+ "Authorization": f"Bearer {settings.openrouter_api_key}",
91
+ "Content-Type": "application/json",
92
+ "HTTP-Referer": "https://lancer-api.hf.space",
93
+ "X-Title": "Lancer Search API",
94
+ },
95
+ json={
96
+ "model": model,
97
+ "messages": messages,
98
+ "temperature": temperature,
99
+ "max_tokens": max_tokens,
100
+ },
101
+ )
102
+ response.raise_for_status()
103
+ data = response.json()
104
+
105
+ return data["choices"][0]["message"]["content"]
app/agents/synthesizer.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Answer synthesizer agent.
2
+
3
+ Generates a coherent answer from search results with citations.
4
+ """
5
+
6
+ from datetime import datetime
7
+ from typing import Optional
8
+
9
+ from app.api.schemas import SearchResult, TemporalContext, Citation
10
+ from app.agents.llm_client import generate_completion
11
+
12
+
13
+ SYNTHESIS_PROMPT = """You are a research assistant that synthesizes information from search results.
14
+
15
+ CURRENT DATE: {current_date}
16
+
17
+ USER QUERY: {query}
18
+
19
+ TEMPORAL CONTEXT:
20
+ - Query intent: {temporal_intent} (the user {intent_explanation})
21
+ - Temporal urgency: {temporal_urgency:.0%} (how important freshness is)
22
+
23
+ SEARCH RESULTS:
24
+ {formatted_results}
25
+
26
+ INSTRUCTIONS:
27
+ 1. Synthesize a comprehensive answer based on the search results
28
+ 2. ALWAYS cite your sources using [1], [2], etc. format
29
+ 3. If the query requires current information, prioritize the most recent results
30
+ 4. If there are conflicting dates or versions mentioned, use the most recent accurate information
31
+ 5. Be concise but thorough
32
+ 6. If information seems outdated compared to current date ({current_date}), note this
33
+ 7. Write in the same language as the query
34
+
35
+ Generate your answer:"""
36
+
37
+
38
+ async def synthesize_answer(
39
+ query: str,
40
+ results: list[SearchResult],
41
+ temporal_context: Optional[TemporalContext] = None,
42
+ ) -> tuple[str, list[Citation]]:
43
+ """
44
+ Synthesize an answer from search results.
45
+
46
+ Args:
47
+ query: Original search query
48
+ results: List of search results to synthesize from
49
+ temporal_context: Temporal analysis context
50
+
51
+ Returns:
52
+ Tuple of (answer_text, citations_list)
53
+ """
54
+ if not results:
55
+ return "No results found to synthesize an answer.", []
56
+
57
+ # Format results for the prompt
58
+ formatted_results = format_results_for_prompt(results[:10]) # Top 10 only
59
+
60
+ # Prepare temporal context
61
+ current_date = datetime.now().strftime("%Y-%m-%d")
62
+ temporal_intent = "neutral"
63
+ temporal_urgency = 0.5
64
+
65
+ if temporal_context:
66
+ temporal_intent = temporal_context.query_temporal_intent
67
+ temporal_urgency = temporal_context.temporal_urgency
68
+ current_date = temporal_context.current_date
69
+
70
+ # Map intent to explanation
71
+ intent_explanations = {
72
+ "current": "is looking for the most recent/current information",
73
+ "historical": "is interested in historical or background information",
74
+ "neutral": "has no specific temporal preference",
75
+ }
76
+
77
+ prompt = SYNTHESIS_PROMPT.format(
78
+ current_date=current_date,
79
+ query=query,
80
+ temporal_intent=temporal_intent,
81
+ intent_explanation=intent_explanations.get(temporal_intent, ""),
82
+ temporal_urgency=temporal_urgency,
83
+ formatted_results=formatted_results,
84
+ )
85
+
86
+ messages = [
87
+ {"role": "system", "content": "You are a helpful research assistant."},
88
+ {"role": "user", "content": prompt},
89
+ ]
90
+
91
+ try:
92
+ answer = await generate_completion(messages, temperature=0.3)
93
+ except Exception as e:
94
+ # Fallback: return a simple summary without LLM
95
+ answer = f"Error generating synthesis: {e}. Please review the search results directly."
96
+
97
+ # Build citations list
98
+ citations = []
99
+ for i, result in enumerate(results[:10], 1):
100
+ citations.append(
101
+ Citation(
102
+ index=i,
103
+ url=result.url,
104
+ title=result.title,
105
+ )
106
+ )
107
+
108
+ return answer, citations
109
+
110
+
111
+ def format_results_for_prompt(results: list[SearchResult]) -> str:
112
+ """Format search results for inclusion in the LLM prompt."""
113
+ formatted = []
114
+
115
+ for i, result in enumerate(results, 1):
116
+ date_str = ""
117
+ if result.published_date:
118
+ date_str = f" (Published: {result.published_date.strftime('%Y-%m-%d')})"
119
+
120
+ formatted.append(
121
+ f"[{i}] {result.title}{date_str}\n"
122
+ f" URL: {result.url}\n"
123
+ f" Freshness: {result.freshness_score:.0%} | Authority: {result.authority_score:.0%}\n"
124
+ f" Content: {result.content[:500]}..."
125
+ )
126
+
127
+ return "\n\n".join(formatted)
app/api/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """API routes package."""
app/api/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (183 Bytes). View file
 
app/api/__pycache__/schemas.cpython-311.pyc ADDED
Binary file (5.92 kB). View file
 
app/api/routes/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """API routes package."""
app/api/routes/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (190 Bytes). View file
 
app/api/routes/__pycache__/search.cpython-311.pyc ADDED
Binary file (5.54 kB). View file
 
app/api/routes/search.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Search API routes."""
2
+
3
+ import time
4
+ from datetime import datetime
5
+
6
+ from fastapi import APIRouter, HTTPException
7
+
8
+ from app.api.schemas import (
9
+ SearchRequest,
10
+ SearchResponse,
11
+ SearchResult,
12
+ TemporalContext,
13
+ Citation,
14
+ ErrorResponse,
15
+ )
16
+ from app.config import get_settings
17
+ from app.temporal.intent_detector import detect_temporal_intent
18
+ from app.temporal.freshness_scorer import calculate_freshness_score
19
+ from app.sources.tavily import search_tavily
20
+ from app.sources.duckduckgo import search_duckduckgo
21
+ from app.reranking.pipeline import rerank_results
22
+ from app.agents.synthesizer import synthesize_answer
23
+
24
+ router = APIRouter()
25
+
26
+
27
+ @router.post(
28
+ "/search",
29
+ response_model=SearchResponse,
30
+ responses={500: {"model": ErrorResponse}},
31
+ summary="Search with AI synthesis",
32
+ description="Perform a search with temporal intelligence and return an AI-synthesized answer.",
33
+ )
34
+ async def search(request: SearchRequest) -> SearchResponse:
35
+ """
36
+ Perform an intelligent search with:
37
+ - Temporal intent detection
38
+ - Multi-source search
39
+ - Multi-stage reranking
40
+ - AI-powered answer synthesis
41
+ """
42
+ start_time = time.perf_counter()
43
+ settings = get_settings()
44
+
45
+ try:
46
+ # Step 1: Analyze temporal intent
47
+ temporal_intent, temporal_urgency = detect_temporal_intent(request.query)
48
+
49
+ temporal_context = TemporalContext(
50
+ query_temporal_intent=temporal_intent,
51
+ temporal_urgency=temporal_urgency,
52
+ current_date=datetime.now().strftime("%Y-%m-%d"),
53
+ )
54
+
55
+ # Step 2: Search multiple sources
56
+ raw_results = []
57
+
58
+ # Try Tavily first (best quality)
59
+ if settings.tavily_api_key:
60
+ tavily_results = await search_tavily(
61
+ query=request.query,
62
+ max_results=settings.max_search_results,
63
+ freshness=request.freshness,
64
+ include_domains=request.include_domains,
65
+ exclude_domains=request.exclude_domains,
66
+ )
67
+ raw_results.extend(tavily_results)
68
+
69
+ # Fallback to DuckDuckGo if needed
70
+ if not raw_results:
71
+ ddg_results = await search_duckduckgo(
72
+ query=request.query,
73
+ max_results=settings.max_search_results,
74
+ )
75
+ raw_results.extend(ddg_results)
76
+
77
+ if not raw_results:
78
+ return SearchResponse(
79
+ query=request.query,
80
+ answer="No results found for your query.",
81
+ results=[],
82
+ citations=[],
83
+ temporal_context=temporal_context,
84
+ processing_time_ms=(time.perf_counter() - start_time) * 1000,
85
+ )
86
+
87
+ # Step 3: Apply multi-stage reranking
88
+ ranked_results = await rerank_results(
89
+ query=request.query,
90
+ results=raw_results,
91
+ temporal_urgency=temporal_urgency,
92
+ max_results=request.max_results,
93
+ )
94
+
95
+ # Step 4: Convert to SearchResult models
96
+ search_results = []
97
+ for i, result in enumerate(ranked_results):
98
+ freshness = calculate_freshness_score(result.get("published_date"))
99
+ search_results.append(
100
+ SearchResult(
101
+ title=result.get("title", ""),
102
+ url=result.get("url", ""),
103
+ content=result.get("content", ""),
104
+ score=result.get("score", 0.5),
105
+ published_date=result.get("published_date"),
106
+ freshness_score=freshness,
107
+ authority_score=result.get("authority_score", 0.5),
108
+ )
109
+ )
110
+
111
+ # Step 5: Synthesize answer (if requested)
112
+ answer = None
113
+ citations = []
114
+
115
+ if request.include_answer and search_results:
116
+ answer, citations = await synthesize_answer(
117
+ query=request.query,
118
+ results=search_results,
119
+ temporal_context=temporal_context,
120
+ )
121
+
122
+ processing_time = (time.perf_counter() - start_time) * 1000
123
+
124
+ return SearchResponse(
125
+ query=request.query,
126
+ answer=answer,
127
+ results=search_results,
128
+ citations=citations,
129
+ temporal_context=temporal_context,
130
+ processing_time_ms=processing_time,
131
+ )
132
+
133
+ except Exception as e:
134
+ raise HTTPException(status_code=500, detail=str(e))
135
+
136
+
137
+ @router.post(
138
+ "/search/raw",
139
+ response_model=SearchResponse,
140
+ summary="Search without synthesis",
141
+ description="Perform a search and return raw results without AI synthesis (faster).",
142
+ )
143
+ async def search_raw(request: SearchRequest) -> SearchResponse:
144
+ """Fast search without answer synthesis."""
145
+ request.include_answer = False
146
+ return await search(request)
app/api/schemas.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic schemas for API request/response models."""
2
+
3
+ from datetime import datetime
4
+ from typing import Literal
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ # === Request Models ===
10
+
11
+ class SearchRequest(BaseModel):
12
+ """Search request payload."""
13
+
14
+ query: str = Field(..., min_length=1, max_length=1000, description="Search query")
15
+ max_results: int = Field(default=10, ge=1, le=50, description="Maximum results to return")
16
+ freshness: Literal["day", "week", "month", "year", "any"] = Field(
17
+ default="any",
18
+ description="Filter results by recency"
19
+ )
20
+ include_domains: list[str] | None = Field(
21
+ default=None,
22
+ description="Only include results from these domains"
23
+ )
24
+ exclude_domains: list[str] | None = Field(
25
+ default=None,
26
+ description="Exclude results from these domains"
27
+ )
28
+ include_answer: bool = Field(
29
+ default=True,
30
+ description="Include AI-generated answer"
31
+ )
32
+
33
+
34
+ # === Response Models ===
35
+
36
+ class Citation(BaseModel):
37
+ """Citation reference for the answer."""
38
+
39
+ index: int = Field(..., description="Citation index (1-based)")
40
+ url: str = Field(..., description="Source URL")
41
+ title: str = Field(..., description="Source title")
42
+
43
+
44
+ class TemporalContext(BaseModel):
45
+ """Temporal metadata about the search."""
46
+
47
+ query_temporal_intent: Literal["current", "historical", "neutral"] = Field(
48
+ ...,
49
+ description="Detected temporal intent of the query"
50
+ )
51
+ temporal_urgency: float = Field(
52
+ ...,
53
+ ge=0.0,
54
+ le=1.0,
55
+ description="How important freshness is for this query (0-1)"
56
+ )
57
+ current_date: str = Field(..., description="Current date for context")
58
+
59
+
60
+ class SearchResult(BaseModel):
61
+ """Individual search result."""
62
+
63
+ title: str = Field(..., description="Result title")
64
+ url: str = Field(..., description="Result URL")
65
+ content: str = Field(..., description="Result content/snippet")
66
+ score: float = Field(..., ge=0.0, le=1.0, description="Overall relevance score")
67
+ published_date: datetime | None = Field(
68
+ default=None,
69
+ description="Publication date if available"
70
+ )
71
+ freshness_score: float = Field(
72
+ default=0.5,
73
+ ge=0.0,
74
+ le=1.0,
75
+ description="How fresh/recent the content is"
76
+ )
77
+ authority_score: float = Field(
78
+ default=0.5,
79
+ ge=0.0,
80
+ le=1.0,
81
+ description="Domain authority/trust score"
82
+ )
83
+
84
+
85
+ class SearchResponse(BaseModel):
86
+ """Complete search response."""
87
+
88
+ query: str = Field(..., description="Original query")
89
+ answer: str | None = Field(
90
+ default=None,
91
+ description="AI-generated answer synthesized from results"
92
+ )
93
+ results: list[SearchResult] = Field(
94
+ default_factory=list,
95
+ description="Ranked search results"
96
+ )
97
+ citations: list[Citation] = Field(
98
+ default_factory=list,
99
+ description="Citations referenced in the answer"
100
+ )
101
+ temporal_context: TemporalContext | None = Field(
102
+ default=None,
103
+ description="Temporal analysis metadata"
104
+ )
105
+ processing_time_ms: float = Field(..., description="Total processing time in milliseconds")
106
+
107
+
108
+ class ErrorResponse(BaseModel):
109
+ """Error response model."""
110
+
111
+ error: str = Field(..., description="Error message")
112
+ detail: str | None = Field(default=None, description="Detailed error information")
app/config.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Application configuration using pydantic-settings."""
2
+
3
+ from functools import lru_cache
4
+ from typing import Literal
5
+
6
+ from pydantic_settings import BaseSettings, SettingsConfigDict
7
+
8
+
9
+ class Settings(BaseSettings):
10
+ """Application settings loaded from environment variables."""
11
+
12
+ model_config = SettingsConfigDict(
13
+ env_file=".env",
14
+ env_file_encoding="utf-8",
15
+ extra="ignore",
16
+ )
17
+
18
+ # API Keys - Search Sources
19
+ tavily_api_key: str = ""
20
+ serper_api_key: str | None = None
21
+
22
+ # API Keys - LLM Providers
23
+ groq_api_key: str | None = None
24
+ openrouter_api_key: str | None = None
25
+
26
+ # LLM Configuration
27
+ llm_provider: Literal["groq", "openrouter"] = "groq"
28
+ llm_model: str = "llama-3.3-70b-versatile"
29
+
30
+ # Reranking Models
31
+ bi_encoder_model: str = "BAAI/bge-small-en-v1.5"
32
+ cross_encoder_model: str = "BAAI/bge-reranker-base"
33
+
34
+ # Temporal Settings
35
+ default_freshness_half_life: int = 30 # days
36
+
37
+ # API Settings
38
+ max_search_results: int = 20
39
+ max_final_results: int = 10
40
+
41
+ @property
42
+ def llm_api_key(self) -> str:
43
+ """Get the appropriate API key based on provider."""
44
+ if self.llm_provider == "groq":
45
+ return self.groq_api_key or ""
46
+ return self.openrouter_api_key or ""
47
+
48
+
49
+ @lru_cache
50
+ def get_settings() -> Settings:
51
+ """Get cached settings instance."""
52
+ return Settings()
app/main.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Lancer API - Main FastAPI application."""
2
+
3
+ from contextlib import asynccontextmanager
4
+ from datetime import datetime
5
+
6
+ from fastapi import FastAPI
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+
9
+ from app.api.routes import search
10
+ from app.config import get_settings
11
+
12
+
13
+ @asynccontextmanager
14
+ async def lifespan(app: FastAPI):
15
+ """Application lifespan events."""
16
+ # Startup
17
+ settings = get_settings()
18
+ print(f"🚀 Lancer API starting...")
19
+ print(f" LLM Provider: {settings.llm_provider}")
20
+ print(f" LLM Model: {settings.llm_model}")
21
+ yield
22
+ # Shutdown
23
+ print("👋 Lancer API shutting down...")
24
+
25
+
26
+ app = FastAPI(
27
+ title="Lancer Search API",
28
+ description="Advanced AI-powered search API with temporal intelligence",
29
+ version="0.1.0",
30
+ lifespan=lifespan,
31
+ )
32
+
33
+ # CORS middleware
34
+ app.add_middleware(
35
+ CORSMiddleware,
36
+ allow_origins=["*"],
37
+ allow_credentials=True,
38
+ allow_methods=["*"],
39
+ allow_headers=["*"],
40
+ )
41
+
42
+ # Include routers
43
+ app.include_router(search.router, prefix="/api/v1", tags=["search"])
44
+
45
+
46
+ @app.get("/health")
47
+ async def health_check():
48
+ """Health check endpoint."""
49
+ return {
50
+ "status": "healthy",
51
+ "timestamp": datetime.now().isoformat(),
52
+ "version": "0.1.0",
53
+ }
54
+
55
+
56
+ @app.get("/")
57
+ async def root():
58
+ """Root endpoint with API info."""
59
+ return {
60
+ "name": "Lancer Search API",
61
+ "version": "0.1.0",
62
+ "docs": "/docs",
63
+ "health": "/health",
64
+ }
app/reranking/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Reranking module."""
app/reranking/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (187 Bytes). View file
 
app/reranking/__pycache__/authority_scorer.cpython-311.pyc ADDED
Binary file (4.48 kB). View file
 
app/reranking/__pycache__/pipeline.cpython-311.pyc ADDED
Binary file (3.55 kB). View file
 
app/reranking/authority_scorer.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Domain authority scoring.
2
+
3
+ Assigns trust/authority scores to domains based on known reliable sources.
4
+ """
5
+
6
+ from urllib.parse import urlparse
7
+
8
+
9
+ # High authority domains (trusted sources)
10
+ HIGH_AUTHORITY_DOMAINS = {
11
+ # Academic & Research
12
+ ".edu": 0.9,
13
+ ".gov": 0.9,
14
+ ".ac.uk": 0.85,
15
+
16
+ # Major tech companies
17
+ "github.com": 0.8,
18
+ "stackoverflow.com": 0.8,
19
+ "docs.python.org": 0.85,
20
+ "developer.mozilla.org": 0.85,
21
+ "arxiv.org": 0.9,
22
+
23
+ # Major news sources
24
+ "reuters.com": 0.8,
25
+ "bbc.com": 0.75,
26
+ "nytimes.com": 0.75,
27
+ "theguardian.com": 0.75,
28
+
29
+ # Reference
30
+ "wikipedia.org": 0.7,
31
+ "britannica.com": 0.8,
32
+
33
+ # AI/ML specific
34
+ "openai.com": 0.85,
35
+ "anthropic.com": 0.85,
36
+ "huggingface.co": 0.8,
37
+ "deepmind.google": 0.85,
38
+ "ai.meta.com": 0.8,
39
+
40
+ # Tech publications
41
+ "techcrunch.com": 0.7,
42
+ "wired.com": 0.7,
43
+ "arstechnica.com": 0.75,
44
+ "theverge.com": 0.7,
45
+ }
46
+
47
+ # Low authority patterns (less reliable)
48
+ LOW_AUTHORITY_PATTERNS = [
49
+ "medium.com", # User-generated, variable quality
50
+ "reddit.com", # Forum, variable quality
51
+ "quora.com", # Q&A, variable quality
52
+ "blogspot.com",
53
+ "wordpress.com",
54
+ "tumblr.com",
55
+ ]
56
+
57
+
58
+ def calculate_authority_score(url: str) -> float:
59
+ """
60
+ Calculate domain authority score for a URL.
61
+
62
+ Args:
63
+ url: The URL to score
64
+
65
+ Returns:
66
+ Authority score between 0.0 and 1.0
67
+ """
68
+ if not url:
69
+ return 0.5
70
+
71
+ try:
72
+ parsed = urlparse(url)
73
+ domain = parsed.netloc.lower()
74
+
75
+ # Remove www. prefix
76
+ if domain.startswith("www."):
77
+ domain = domain[4:]
78
+
79
+ # Check for exact domain matches
80
+ for known_domain, score in HIGH_AUTHORITY_DOMAINS.items():
81
+ if domain == known_domain or domain.endswith(known_domain):
82
+ return score
83
+
84
+ # Check for TLD-based authority (.edu, .gov, etc.)
85
+ for tld, score in HIGH_AUTHORITY_DOMAINS.items():
86
+ if tld.startswith(".") and domain.endswith(tld):
87
+ return score
88
+
89
+ # Check for low authority patterns
90
+ for pattern in LOW_AUTHORITY_PATTERNS:
91
+ if pattern in domain:
92
+ return 0.4
93
+
94
+ # Default score for unknown domains
95
+ return 0.5
96
+
97
+ except Exception:
98
+ return 0.5
99
+
100
+
101
+ def get_domain_category(url: str) -> str:
102
+ """
103
+ Get a category label for the domain.
104
+
105
+ Args:
106
+ url: The URL to categorize
107
+
108
+ Returns:
109
+ Category string like "Academic", "News", "Tech", etc.
110
+ """
111
+ if not url:
112
+ return "Unknown"
113
+
114
+ try:
115
+ parsed = urlparse(url)
116
+ domain = parsed.netloc.lower()
117
+
118
+ if ".edu" in domain or ".ac.uk" in domain or "arxiv" in domain:
119
+ return "Academic"
120
+ elif ".gov" in domain:
121
+ return "Government"
122
+ elif any(site in domain for site in ["github", "stackoverflow", "docs."]):
123
+ return "Developer"
124
+ elif any(site in domain for site in ["reuters", "bbc", "nytimes", "cnn", "guardian"]):
125
+ return "News"
126
+ elif any(site in domain for site in ["openai", "anthropic", "huggingface", "deepmind"]):
127
+ return "AI/ML"
128
+ elif "wikipedia" in domain:
129
+ return "Reference"
130
+ else:
131
+ return "General"
132
+
133
+ except Exception:
134
+ return "Unknown"
app/reranking/pipeline.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Multi-stage reranking pipeline.
2
+
3
+ Implements a 3-stage reranking approach:
4
+ 1. Bi-Encoder: Fast semantic similarity (optional, for large result sets)
5
+ 2. Cross-Encoder: Accurate relevance scoring
6
+ 3. Temporal + Authority: Freshness and domain trust weighting
7
+ """
8
+
9
+ from typing import Optional
10
+
11
+ from app.temporal.freshness_scorer import calculate_freshness_score, adjust_score_by_freshness
12
+ from app.reranking.authority_scorer import calculate_authority_score
13
+
14
+
15
+ async def rerank_results(
16
+ query: str,
17
+ results: list[dict],
18
+ temporal_urgency: float = 0.5,
19
+ max_results: int = 10,
20
+ ) -> list[dict]:
21
+ """
22
+ Apply multi-stage reranking to search results.
23
+
24
+ For MVP, we use a simplified pipeline:
25
+ - Calculate freshness scores
26
+ - Calculate authority scores
27
+ - Combine with original relevance scores
28
+
29
+ Full pipeline with embeddings can be enabled later.
30
+
31
+ Args:
32
+ query: Original search query
33
+ results: Raw search results
34
+ temporal_urgency: How important freshness is (0-1)
35
+ max_results: Maximum results to return
36
+
37
+ Returns:
38
+ Reranked results with updated scores
39
+ """
40
+ if not results:
41
+ return []
42
+
43
+ # Stage 1: Skip bi-encoder for now (MVP)
44
+ # In production, use sentence-transformers for initial filtering of 100+ results
45
+
46
+ # Stage 2: Skip cross-encoder for now (MVP)
47
+ # In production, use BGE-reranker for precise scoring
48
+
49
+ # Stage 3: Apply temporal + authority scoring
50
+ scored_results = []
51
+
52
+ for result in results:
53
+ # Calculate freshness score
54
+ freshness = calculate_freshness_score(result.get("published_date"))
55
+ result["freshness_score"] = freshness
56
+
57
+ # Calculate authority score
58
+ authority = calculate_authority_score(result.get("url", ""))
59
+ result["authority_score"] = authority
60
+
61
+ # Get base score (from search source)
62
+ base_score = result.get("score", 0.5)
63
+
64
+ # Adjust for freshness based on temporal urgency
65
+ adjusted_score = adjust_score_by_freshness(
66
+ base_score=base_score,
67
+ freshness_score=freshness,
68
+ temporal_urgency=temporal_urgency,
69
+ )
70
+
71
+ # Also factor in authority (10% weight)
72
+ final_score = (adjusted_score * 0.9) + (authority * 0.1)
73
+ result["score"] = final_score
74
+
75
+ scored_results.append(result)
76
+
77
+ # Sort by final score (descending)
78
+ scored_results.sort(key=lambda x: x["score"], reverse=True)
79
+
80
+ return scored_results[:max_results]
81
+
82
+
83
+ async def rerank_with_embeddings(
84
+ query: str,
85
+ results: list[dict],
86
+ max_results: int = 10,
87
+ ) -> list[dict]:
88
+ """
89
+ Full reranking with embedding models.
90
+
91
+ TODO: Implement when adding sentence-transformers support:
92
+ 1. Use bi-encoder for fast filtering
93
+ 2. Use cross-encoder for precise scoring
94
+
95
+ This is a placeholder for the full implementation.
96
+ """
97
+ # For now, just return sorted by original score
98
+ sorted_results = sorted(results, key=lambda x: x.get("score", 0), reverse=True)
99
+ return sorted_results[:max_results]
app/sources/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Search sources module."""
app/sources/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (190 Bytes). View file
 
app/sources/__pycache__/duckduckgo.cpython-311.pyc ADDED
Binary file (3.69 kB). View file
 
app/sources/__pycache__/tavily.cpython-311.pyc ADDED
Binary file (4.15 kB). View file
 
app/sources/duckduckgo.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """DuckDuckGo search source (free fallback).
2
+
3
+ Uses the duckduckgo_search library for free web search.
4
+ """
5
+
6
+ from datetime import datetime, timedelta
7
+ from typing import Optional
8
+
9
+ import httpx
10
+
11
+
12
+ async def search_duckduckgo(
13
+ query: str,
14
+ max_results: int = 10,
15
+ region: str = "wt-wt", # Worldwide
16
+ ) -> list[dict]:
17
+ """
18
+ Search using DuckDuckGo (free, no API key required).
19
+
20
+ This is a fallback when other sources are unavailable.
21
+ Uses the HTML endpoint for basic search.
22
+
23
+ Args:
24
+ query: Search query
25
+ max_results: Maximum results to return
26
+ region: Region code
27
+
28
+ Returns:
29
+ List of result dicts with title, url, content
30
+ """
31
+ try:
32
+ # Use DuckDuckGo HTML API (lightweight, no JS needed)
33
+ params = {
34
+ "q": query,
35
+ "kl": region,
36
+ "kp": "-1", # Safe search off
37
+ }
38
+
39
+ headers = {
40
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
41
+ }
42
+
43
+ async with httpx.AsyncClient(timeout=15.0) as client:
44
+ # Use DuckDuckGo Lite (simpler to parse)
45
+ response = await client.get(
46
+ "https://lite.duckduckgo.com/lite/",
47
+ params=params,
48
+ headers=headers,
49
+ follow_redirects=True,
50
+ )
51
+ response.raise_for_status()
52
+ html = response.text
53
+
54
+ # Simple HTML parsing for results
55
+ results = parse_ddg_lite_results(html, max_results)
56
+ return results
57
+
58
+ except Exception as e:
59
+ print(f"DuckDuckGo search error: {e}")
60
+ return []
61
+
62
+
63
+ def parse_ddg_lite_results(html: str, max_results: int) -> list[dict]:
64
+ """
65
+ Parse DuckDuckGo Lite HTML results.
66
+
67
+ This is a simple parser for the lite version of DDG.
68
+ """
69
+ import re
70
+
71
+ results = []
72
+
73
+ # Find all result links (class="result-link")
74
+ # Pattern: <a rel="nofollow" href="URL" class='result-link'>TITLE</a>
75
+ link_pattern = r'<a[^>]*class=["\']result-link["\'][^>]*href=["\']([^"\']+)["\'][^>]*>([^<]+)</a>'
76
+
77
+ # Find snippets (class="result-snippet")
78
+ snippet_pattern = r'<td[^>]*class=["\']result-snippet["\'][^>]*>([^<]+)</td>'
79
+
80
+ links = re.findall(link_pattern, html, re.IGNORECASE)
81
+ snippets = re.findall(snippet_pattern, html, re.IGNORECASE)
82
+
83
+ for i, (url, title) in enumerate(links[:max_results]):
84
+ content = snippets[i] if i < len(snippets) else ""
85
+
86
+ # Clean up HTML entities
87
+ title = title.strip()
88
+ content = content.strip()
89
+
90
+ # Skip DuckDuckGo internal links
91
+ if "duckduckgo.com" in url:
92
+ continue
93
+
94
+ results.append({
95
+ "title": title,
96
+ "url": url,
97
+ "content": content,
98
+ "published_date": None, # DDG Lite doesn't provide dates
99
+ "score": 0.5, # Neutral score, will be reranked
100
+ "source": "duckduckgo",
101
+ })
102
+
103
+ return results[:max_results]
app/sources/tavily.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tavily search source integration.
2
+
3
+ Tavily provides high-quality, AI-optimized search results.
4
+ """
5
+
6
+ from datetime import datetime
7
+ from typing import Literal, Optional
8
+
9
+ import httpx
10
+
11
+ from app.config import get_settings
12
+
13
+
14
+ async def search_tavily(
15
+ query: str,
16
+ max_results: int = 10,
17
+ freshness: Literal["day", "week", "month", "year", "any"] = "any",
18
+ include_domains: Optional[list[str]] = None,
19
+ exclude_domains: Optional[list[str]] = None,
20
+ search_depth: Literal["basic", "advanced"] = "advanced",
21
+ ) -> list[dict]:
22
+ """
23
+ Search using Tavily API.
24
+
25
+ Args:
26
+ query: Search query
27
+ max_results: Maximum results to return
28
+ freshness: Filter by recency
29
+ include_domains: Only include these domains
30
+ exclude_domains: Exclude these domains
31
+ search_depth: "basic" (fast) or "advanced" (thorough)
32
+
33
+ Returns:
34
+ List of result dicts with title, url, content, published_date, score
35
+ """
36
+ settings = get_settings()
37
+
38
+ if not settings.tavily_api_key:
39
+ return []
40
+
41
+ # Map freshness to Tavily's days parameter
42
+ days_map = {
43
+ "day": 1,
44
+ "week": 7,
45
+ "month": 30,
46
+ "year": 365,
47
+ "any": None,
48
+ }
49
+
50
+ payload = {
51
+ "api_key": settings.tavily_api_key,
52
+ "query": query,
53
+ "search_depth": search_depth,
54
+ "max_results": max_results,
55
+ "include_answer": False,
56
+ "include_raw_content": False,
57
+ }
58
+
59
+ # Add optional filters
60
+ if days_map.get(freshness):
61
+ payload["days"] = days_map[freshness]
62
+
63
+ if include_domains:
64
+ payload["include_domains"] = include_domains
65
+
66
+ if exclude_domains:
67
+ payload["exclude_domains"] = exclude_domains
68
+
69
+ try:
70
+ async with httpx.AsyncClient(timeout=30.0) as client:
71
+ response = await client.post(
72
+ "https://api.tavily.com/search",
73
+ json=payload,
74
+ )
75
+ response.raise_for_status()
76
+ data = response.json()
77
+
78
+ results = []
79
+ for item in data.get("results", []):
80
+ # Parse published date if available
81
+ pub_date = None
82
+ if "published_date" in item and item["published_date"]:
83
+ try:
84
+ pub_date = datetime.fromisoformat(
85
+ item["published_date"].replace("Z", "+00:00")
86
+ )
87
+ except (ValueError, TypeError):
88
+ pass
89
+
90
+ results.append({
91
+ "title": item.get("title", ""),
92
+ "url": item.get("url", ""),
93
+ "content": item.get("content", ""),
94
+ "published_date": pub_date,
95
+ "score": item.get("score", 0.5),
96
+ "source": "tavily",
97
+ })
98
+
99
+ return results
100
+
101
+ except httpx.HTTPError as e:
102
+ print(f"Tavily search error: {e}")
103
+ return []
104
+ except Exception as e:
105
+ print(f"Tavily unexpected error: {e}")
106
+ return []
app/temporal/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Temporal intelligence module."""
app/temporal/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (198 Bytes). View file
 
app/temporal/__pycache__/freshness_scorer.cpython-311.pyc ADDED
Binary file (3.81 kB). View file
 
app/temporal/__pycache__/intent_detector.cpython-311.pyc ADDED
Binary file (3.01 kB). View file