Commit ·
a2e3bb6
1
Parent(s): b851773
Add DDGS API for Hugging Face
Browse files- .dockerignore +12 -0
- .gitignore +13 -0
- AGENTS.md +21 -0
- Dockerfile +14 -0
- README.md +94 -2
- app.py +94 -0
- ddgs_cli.py +206 -0
- pyproject.toml +24 -0
- resources.md +13 -0
- test_scripts/hf_upload_secrets.py +41 -0
- test_scripts/hf_upload_secrets.sh +23 -0
- test_scripts/run_smoke.sh +60 -0
- test_scripts/update_venv_cert.sh +48 -0
.dockerignore
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
.venv/
|
| 3 |
+
__pycache__/
|
| 4 |
+
*.pyc
|
| 5 |
+
*.pyo
|
| 6 |
+
*.pyd
|
| 7 |
+
*.egg-info/
|
| 8 |
+
.dist/
|
| 9 |
+
.build/
|
| 10 |
+
.vscode/
|
| 11 |
+
.idea/
|
| 12 |
+
certif_zscaler/
|
.gitignore
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
.venv/
|
| 3 |
+
__pycache__/
|
| 4 |
+
*.pyc
|
| 5 |
+
*.pyo
|
| 6 |
+
*.pyd
|
| 7 |
+
*.egg-info/
|
| 8 |
+
.dist/
|
| 9 |
+
.build/
|
| 10 |
+
.vscode/
|
| 11 |
+
.idea/
|
| 12 |
+
.ddgs_search_err
|
| 13 |
+
certif_zscaler/*
|
AGENTS.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Instructions
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
## General
|
| 5 |
+
|
| 6 |
+
Aunque el usuario te hablé en Español, toda la documentación, código, comentarios, interacción con el usuario, etc. será en Inglés.
|
| 7 |
+
|
| 8 |
+
## Credenciales
|
| 9 |
+
|
| 10 |
+
Guardarlas siempre en un fichero .env en la carpeta raiz
|
| 11 |
+
Estará siempre incluido en el fichero .gitignore
|
| 12 |
+
|
| 13 |
+
## Testing
|
| 14 |
+
|
| 15 |
+
Cada vez que modifiques algo, modifica los scripts de test para comprobar que funciona.
|
| 16 |
+
Nunca des por terminada una tarea sin comprobar que funciona y no hay errores de pylance.
|
| 17 |
+
|
| 18 |
+
## Resources
|
| 19 |
+
|
| 20 |
+
Tienes la info en el fichero resources.md.
|
| 21 |
+
Si descubres algo nuevo, actualizalo ahi.
|
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 4 |
+
PYTHONUNBUFFERED=1
|
| 5 |
+
|
| 6 |
+
WORKDIR /app
|
| 7 |
+
|
| 8 |
+
COPY pyproject.toml README.md ddgs_cli.py app.py ./
|
| 9 |
+
RUN python -m pip install --no-cache-dir --upgrade pip \
|
| 10 |
+
&& python -m pip install --no-cache-dir .
|
| 11 |
+
|
| 12 |
+
EXPOSE 7860
|
| 13 |
+
|
| 14 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,10 +1,102 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
emoji: 👁
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: red
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: DDGS Search API
|
| 3 |
emoji: 👁
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: red
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# DDGS API CLI
|
| 12 |
+
|
| 13 |
+
CLI scaffold for DuckDuckGo Search (DDGS) text search.
|
| 14 |
+
|
| 15 |
+
## Quick start
|
| 16 |
+
|
| 17 |
+
```bash
|
| 18 |
+
python -m venv .venv
|
| 19 |
+
. .venv/bin/activate
|
| 20 |
+
pip install -e .
|
| 21 |
+
|
| 22 |
+
# edit .env if needed
|
| 23 |
+
|
| 24 |
+
# run
|
| 25 |
+
ddgs-search "openai"
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
## Usage
|
| 29 |
+
|
| 30 |
+
```bash
|
| 31 |
+
ddgs-search --help
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
Example:
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
ddgs-search "site:openai.com safety" --region us-en --safesearch moderate --timelimit m --max-results 5 --format json
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
`--format json` and `--format jsonl` emit the full result objects returned by DDGS (all available fields).
|
| 41 |
+
|
| 42 |
+
## API (FastAPI)
|
| 43 |
+
|
| 44 |
+
Run locally:
|
| 45 |
+
|
| 46 |
+
```bash
|
| 47 |
+
uvicorn app:app --host 0.0.0.0 --port 7860
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
Local verification:
|
| 51 |
+
|
| 52 |
+
```bash
|
| 53 |
+
uvicorn app:app --host 127.0.0.1 --port 7860
|
| 54 |
+
curl -X POST http://127.0.0.1:7860/search \\
|
| 55 |
+
-H "Authorization: Bearer $API_BEARER_TOKEN" \\
|
| 56 |
+
-H "Content-Type: application/json" \\
|
| 57 |
+
-d '{"query":"openai","max_results":1,"region":"us-en","safesearch":"moderate","timelimit":"m","backend":"auto","timeout":30,"verify":true}'
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
Request:
|
| 61 |
+
|
| 62 |
+
```bash
|
| 63 |
+
curl -X POST http://localhost:7860/search \\
|
| 64 |
+
-H "Authorization: Bearer $API_BEARER_TOKEN" \\
|
| 65 |
+
-H "Content-Type: application/json" \\
|
| 66 |
+
-d '{"query":"openai","max_results":5,"region":"us-en","safesearch":"moderate","timelimit":"m","backend":"auto","timeout":30,"verify":true}'
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
## Hugging Face (Docker)
|
| 70 |
+
|
| 71 |
+
1) Ensure `.env` has `HF_TOKEN`, `HF_SPACE_ID`, and `API_BEARER_TOKEN`.
|
| 72 |
+
2) Upload secrets to the Space:
|
| 73 |
+
|
| 74 |
+
```bash
|
| 75 |
+
test_scripts/hf_upload_secrets.sh
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
3) Build and run the Docker image (HF Spaces will do this automatically on push):
|
| 79 |
+
|
| 80 |
+
```bash
|
| 81 |
+
docker build -t ddgs-api .
|
| 82 |
+
docker run -p 7860:7860 --env-file .env ddgs-api
|
| 83 |
+
```
|
| 84 |
+
|
| 85 |
+
Note: `certif_zscaler/` is excluded from the Docker build context via `.dockerignore`. Zscaler is only used locally when `USE_LOCAL_ZSCALER_CERT=true`. HF deployments will not load Zscaler certs.
|
| 86 |
+
|
| 87 |
+
## Environment variables
|
| 88 |
+
|
| 89 |
+
- `DDGS_REGION`
|
| 90 |
+
- `DDGS_SAFESEARCH`
|
| 91 |
+
- `DDGS_TIMELIMIT`
|
| 92 |
+
- `DDGS_MAX_RESULTS`
|
| 93 |
+
- `DDGS_BACKEND`
|
| 94 |
+
- `DDGS_PROXY`
|
| 95 |
+
- `DDGS_TIMEOUT`
|
| 96 |
+
- `DDGS_OUTPUT`
|
| 97 |
+
- `DDGS_VERIFY`
|
| 98 |
+
- `HF_TOKEN`
|
| 99 |
+
- `HF_SPACE_ID`
|
| 100 |
+
- `API_BEARER_TOKEN`
|
| 101 |
+
- `CERT_ZSCALER_PEM`
|
| 102 |
+
- `USE_LOCAL_ZSCALER_CERT`
|
app.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from typing import Optional
|
| 4 |
+
|
| 5 |
+
import certifi
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
from fastapi import Depends, FastAPI, Header, HTTPException, status
|
| 8 |
+
from pydantic import BaseModel, Field
|
| 9 |
+
|
| 10 |
+
from ddgs_cli import _env_int, _env_str, _load_env, ddgs_search
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def _ensure_ca_bundle() -> None:
|
| 14 |
+
if os.getenv("SSL_CERT_FILE") or os.getenv("REQUESTS_CA_BUNDLE"):
|
| 15 |
+
return
|
| 16 |
+
if os.getenv("HF_SPACE_ID") or os.getenv("SPACE_ID") or os.getenv("HF_SPACE"):
|
| 17 |
+
return
|
| 18 |
+
if os.getenv("USE_LOCAL_ZSCALER_CERT", "false").lower() != "true":
|
| 19 |
+
return
|
| 20 |
+
|
| 21 |
+
pem_inline = os.getenv("CERT_ZSCALER_PEM")
|
| 22 |
+
cert_bytes: bytes | None = None
|
| 23 |
+
if pem_inline:
|
| 24 |
+
cert_bytes = pem_inline.encode("utf-8")
|
| 25 |
+
else:
|
| 26 |
+
cert_path = Path(__file__).resolve().parent / "certif_zscaler" / "Zscaler_Root_CA.pem"
|
| 27 |
+
if cert_path.exists():
|
| 28 |
+
cert_bytes = cert_path.read_bytes()
|
| 29 |
+
else:
|
| 30 |
+
return
|
| 31 |
+
|
| 32 |
+
bundle_path = Path("/tmp/ca_bundle.pem")
|
| 33 |
+
certifi_path = Path(certifi.where())
|
| 34 |
+
bundle_path.write_bytes(certifi_path.read_bytes() + b"\n" + cert_bytes)
|
| 35 |
+
os.environ["SSL_CERT_FILE"] = str(bundle_path)
|
| 36 |
+
os.environ["REQUESTS_CA_BUNDLE"] = str(bundle_path)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
_load_env()
|
| 40 |
+
_ensure_ca_bundle()
|
| 41 |
+
|
| 42 |
+
app = FastAPI(title="DDGS Search API", version="0.1.0")
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def _require_auth(authorization: Optional[str] = Header(default=None)) -> None:
|
| 46 |
+
expected = os.getenv("API_BEARER_TOKEN")
|
| 47 |
+
if not expected:
|
| 48 |
+
raise HTTPException(
|
| 49 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 50 |
+
detail="API_BEARER_TOKEN is not configured",
|
| 51 |
+
)
|
| 52 |
+
if not authorization or not authorization.startswith("Bearer "):
|
| 53 |
+
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Unauthorized")
|
| 54 |
+
token = authorization.split(" ", 1)[1]
|
| 55 |
+
if token != expected:
|
| 56 |
+
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Unauthorized")
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class SearchRequest(BaseModel):
|
| 60 |
+
query: str = Field(..., min_length=1)
|
| 61 |
+
region: str = Field(default_factory=lambda: _env_str("DDGS_REGION", "us-en") or "us-en")
|
| 62 |
+
safesearch: str = Field(default_factory=lambda: _env_str("DDGS_SAFESEARCH", "moderate") or "moderate")
|
| 63 |
+
timelimit: Optional[str] = Field(default_factory=lambda: _env_str("DDGS_TIMELIMIT", None))
|
| 64 |
+
max_results: int = Field(default_factory=lambda: _env_int("DDGS_MAX_RESULTS", 10) or 10, ge=1)
|
| 65 |
+
backend: Optional[str] = Field(default_factory=lambda: _env_str("DDGS_BACKEND", "auto"))
|
| 66 |
+
proxy: Optional[str] = Field(default_factory=lambda: _env_str("DDGS_PROXY", None))
|
| 67 |
+
timeout: int = Field(default_factory=lambda: _env_int("DDGS_TIMEOUT", 30) or 30, ge=1)
|
| 68 |
+
verify: bool = Field(default_factory=lambda: _env_str("DDGS_VERIFY", "true").lower() != "false")
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@app.get("/health")
|
| 72 |
+
def health() -> dict:
|
| 73 |
+
return {"status": "ok"}
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
@app.get("/")
|
| 77 |
+
def root() -> dict:
|
| 78 |
+
return {"status": "ok"}
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
@app.post("/search")
|
| 82 |
+
def search(request: SearchRequest, _: None = Depends(_require_auth)) -> dict:
|
| 83 |
+
results = ddgs_search(
|
| 84 |
+
request.query,
|
| 85 |
+
region=request.region,
|
| 86 |
+
safesearch=request.safesearch,
|
| 87 |
+
timelimit=request.timelimit,
|
| 88 |
+
max_results=request.max_results,
|
| 89 |
+
backend=request.backend,
|
| 90 |
+
proxy=request.proxy,
|
| 91 |
+
timeout=request.timeout,
|
| 92 |
+
verify=request.verify,
|
| 93 |
+
)
|
| 94 |
+
return {"query": request.query, "count": len(results), "results": results}
|
ddgs_cli.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import inspect
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
from ddgs import DDGS
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def _load_env() -> None:
|
| 13 |
+
cwd_env = Path.cwd() / ".env"
|
| 14 |
+
if cwd_env.exists():
|
| 15 |
+
load_dotenv(dotenv_path=cwd_env)
|
| 16 |
+
else:
|
| 17 |
+
repo_env = Path(__file__).resolve().parents[0] / ".env"
|
| 18 |
+
if repo_env.exists():
|
| 19 |
+
load_dotenv(dotenv_path=repo_env)
|
| 20 |
+
|
| 21 |
+
if os.getenv("DDGS_PROXY", "") == "":
|
| 22 |
+
os.environ.pop("DDGS_PROXY", None)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _env_int(name: str, fallback: int | None) -> int | None:
|
| 26 |
+
value = os.getenv(name)
|
| 27 |
+
if value is None or value == "":
|
| 28 |
+
return fallback
|
| 29 |
+
try:
|
| 30 |
+
return int(value)
|
| 31 |
+
except ValueError:
|
| 32 |
+
return fallback
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _env_str(name: str, fallback: str | None) -> str | None:
|
| 36 |
+
value = os.getenv(name)
|
| 37 |
+
if value is None or value == "":
|
| 38 |
+
return fallback
|
| 39 |
+
return value
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _build_parser() -> argparse.ArgumentParser:
|
| 43 |
+
parser = argparse.ArgumentParser(
|
| 44 |
+
prog="ddgs-search",
|
| 45 |
+
description="CLI scaffold for DuckDuckGo Search (DDGS) text search",
|
| 46 |
+
)
|
| 47 |
+
parser.add_argument(
|
| 48 |
+
"query",
|
| 49 |
+
nargs="+",
|
| 50 |
+
help="Search query text",
|
| 51 |
+
)
|
| 52 |
+
parser.add_argument(
|
| 53 |
+
"--region",
|
| 54 |
+
default=_env_str("DDGS_REGION", "us-en"),
|
| 55 |
+
help="Region code (default: DDGS_REGION or us-en)",
|
| 56 |
+
)
|
| 57 |
+
parser.add_argument(
|
| 58 |
+
"--safesearch",
|
| 59 |
+
choices=["on", "moderate", "off"],
|
| 60 |
+
default=_env_str("DDGS_SAFESEARCH", "moderate"),
|
| 61 |
+
help="Safe search level (default: DDGS_SAFESEARCH or moderate)",
|
| 62 |
+
)
|
| 63 |
+
parser.add_argument(
|
| 64 |
+
"--timelimit",
|
| 65 |
+
choices=["d", "w", "m", "y"],
|
| 66 |
+
default=_env_str("DDGS_TIMELIMIT", None),
|
| 67 |
+
help="Time limit: d/w/m/y (default: DDGS_TIMELIMIT)",
|
| 68 |
+
)
|
| 69 |
+
parser.add_argument(
|
| 70 |
+
"--max-results",
|
| 71 |
+
type=int,
|
| 72 |
+
default=_env_int("DDGS_MAX_RESULTS", 10),
|
| 73 |
+
help="Maximum number of results (default: DDGS_MAX_RESULTS or 10)",
|
| 74 |
+
)
|
| 75 |
+
parser.add_argument(
|
| 76 |
+
"--backend",
|
| 77 |
+
default=_env_str("DDGS_BACKEND", "auto"),
|
| 78 |
+
help="Backend to use (default: DDGS_BACKEND or auto)",
|
| 79 |
+
)
|
| 80 |
+
parser.add_argument(
|
| 81 |
+
"--proxy",
|
| 82 |
+
default=_env_str("DDGS_PROXY", None),
|
| 83 |
+
help="Proxy URL (default: DDGS_PROXY)",
|
| 84 |
+
)
|
| 85 |
+
parser.add_argument(
|
| 86 |
+
"--timeout",
|
| 87 |
+
type=int,
|
| 88 |
+
default=_env_int("DDGS_TIMEOUT", 30),
|
| 89 |
+
help="Request timeout seconds (default: DDGS_TIMEOUT or 30)",
|
| 90 |
+
)
|
| 91 |
+
verify_default = _env_str("DDGS_VERIFY", "true").lower() != "false"
|
| 92 |
+
parser.add_argument(
|
| 93 |
+
"--verify",
|
| 94 |
+
dest="verify",
|
| 95 |
+
action="store_true",
|
| 96 |
+
default=verify_default,
|
| 97 |
+
help="Enable SSL verification (default: DDGS_VERIFY or true)",
|
| 98 |
+
)
|
| 99 |
+
parser.add_argument(
|
| 100 |
+
"--no-verify",
|
| 101 |
+
dest="verify",
|
| 102 |
+
action="store_false",
|
| 103 |
+
help="Disable SSL verification (not recommended)",
|
| 104 |
+
)
|
| 105 |
+
parser.add_argument(
|
| 106 |
+
"--format",
|
| 107 |
+
choices=["json", "jsonl", "text"],
|
| 108 |
+
default=_env_str("DDGS_OUTPUT", "json"),
|
| 109 |
+
help="Output format: json, jsonl, text (default: DDGS_OUTPUT or json)",
|
| 110 |
+
)
|
| 111 |
+
return parser
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def _print_text(results: list[dict]) -> None:
|
| 115 |
+
for idx, item in enumerate(results, start=1):
|
| 116 |
+
sys.stdout.write(f"[{idx}]\n")
|
| 117 |
+
for key in sorted(item.keys()):
|
| 118 |
+
value = item.get(key, "")
|
| 119 |
+
sys.stdout.write(f"{key}: {value}\n")
|
| 120 |
+
sys.stdout.write("\n")
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def _print_json(results: list[dict], jsonl: bool) -> None:
|
| 124 |
+
if jsonl:
|
| 125 |
+
for item in results:
|
| 126 |
+
sys.stdout.write(json.dumps(item, ensure_ascii=True) + "\n")
|
| 127 |
+
return
|
| 128 |
+
sys.stdout.write(json.dumps(results, ensure_ascii=True, indent=2) + "\n")
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def _resolve_verify(verify_flag: bool) -> bool | str:
|
| 132 |
+
if not verify_flag:
|
| 133 |
+
return False
|
| 134 |
+
ca_bundle = os.getenv("SSL_CERT_FILE") or os.getenv("REQUESTS_CA_BUNDLE")
|
| 135 |
+
return ca_bundle or True
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def ddgs_search(
|
| 139 |
+
query: str,
|
| 140 |
+
*,
|
| 141 |
+
region: str,
|
| 142 |
+
safesearch: str,
|
| 143 |
+
timelimit: str | None,
|
| 144 |
+
max_results: int,
|
| 145 |
+
backend: str | None,
|
| 146 |
+
proxy: str | None,
|
| 147 |
+
timeout: int,
|
| 148 |
+
verify: bool,
|
| 149 |
+
) -> list[dict]:
|
| 150 |
+
ddgs_kwargs: dict[str, object] = {
|
| 151 |
+
"region": region,
|
| 152 |
+
"safesearch": safesearch,
|
| 153 |
+
"max_results": max_results,
|
| 154 |
+
}
|
| 155 |
+
if timelimit:
|
| 156 |
+
ddgs_kwargs["timelimit"] = timelimit
|
| 157 |
+
if backend:
|
| 158 |
+
ddgs_kwargs["backend"] = backend
|
| 159 |
+
|
| 160 |
+
ddgs_init_kwargs: dict[str, object] = {"timeout": timeout, "verify": _resolve_verify(verify)}
|
| 161 |
+
if proxy:
|
| 162 |
+
ddgs_params = inspect.signature(DDGS).parameters
|
| 163 |
+
if "proxies" in ddgs_params:
|
| 164 |
+
ddgs_init_kwargs["proxies"] = proxy
|
| 165 |
+
elif "proxy" in ddgs_params:
|
| 166 |
+
ddgs_init_kwargs["proxy"] = proxy
|
| 167 |
+
|
| 168 |
+
with DDGS(**ddgs_init_kwargs) as ddgs:
|
| 169 |
+
return list(ddgs.text(query, **ddgs_kwargs))
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def main(argv: list[str] | None = None) -> int:
|
| 173 |
+
_load_env()
|
| 174 |
+
parser = _build_parser()
|
| 175 |
+
args = parser.parse_args(argv)
|
| 176 |
+
|
| 177 |
+
query = " ".join(args.query)
|
| 178 |
+
|
| 179 |
+
try:
|
| 180 |
+
results = ddgs_search(
|
| 181 |
+
query,
|
| 182 |
+
region=args.region,
|
| 183 |
+
safesearch=args.safesearch,
|
| 184 |
+
timelimit=args.timelimit,
|
| 185 |
+
max_results=args.max_results,
|
| 186 |
+
backend=args.backend,
|
| 187 |
+
proxy=args.proxy,
|
| 188 |
+
timeout=args.timeout,
|
| 189 |
+
verify=args.verify,
|
| 190 |
+
)
|
| 191 |
+
except Exception as exc: # noqa: BLE001 - CLI should show all errors
|
| 192 |
+
sys.stderr.write(f"Error: {exc}\n")
|
| 193 |
+
return 1
|
| 194 |
+
|
| 195 |
+
if args.format == "text":
|
| 196 |
+
_print_text(results)
|
| 197 |
+
elif args.format == "jsonl":
|
| 198 |
+
_print_json(results, jsonl=True)
|
| 199 |
+
else:
|
| 200 |
+
_print_json(results, jsonl=False)
|
| 201 |
+
|
| 202 |
+
return 0
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
if __name__ == "__main__":
|
| 206 |
+
raise SystemExit(main())
|
pyproject.toml
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "ddgs-api-cli"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "CLI scaffold for DuckDuckGo Search (DDGS)"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.10"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"ddgs",
|
| 9 |
+
"fastapi",
|
| 10 |
+
"uvicorn",
|
| 11 |
+
"certifi",
|
| 12 |
+
"huggingface_hub",
|
| 13 |
+
"python-dotenv",
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
[project.scripts]
|
| 17 |
+
ddgs-search = "ddgs_cli:main"
|
| 18 |
+
|
| 19 |
+
[tool.setuptools]
|
| 20 |
+
py-modules = ["ddgs_cli"]
|
| 21 |
+
|
| 22 |
+
[build-system]
|
| 23 |
+
requires = ["setuptools>=68"]
|
| 24 |
+
build-backend = "setuptools.build_meta"
|
resources.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Search Engine
|
| 2 |
+
|
| 3 |
+
## Duck Duck Go Search (DDGS)
|
| 4 |
+
URL Info: https://pypi.org/project/ddgs/
|
| 5 |
+
install: pip install ddgs
|
| 6 |
+
|
| 7 |
+
### Common parameters
|
| 8 |
+
- `DDGS()` supports `proxies` and `timeout`.
|
| 9 |
+
- `DDGS()` supports `verify` (bool or path to CA bundle).
|
| 10 |
+
- `DDGS.text()` commonly supports `region`, `safesearch`, `timelimit`, `backend`, `max_results`.
|
| 11 |
+
|
| 12 |
+
## Hugging Face Spaces
|
| 13 |
+
API docs: https://huggingface.co/docs/huggingface_hub/main/en/package_reference/hf_api#huggingface_hub.HfApi.add_space_secret
|
test_scripts/hf_upload_secrets.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
from huggingface_hub import HfApi
|
| 7 |
+
|
| 8 |
+
ROOT_DIR = Path(__file__).resolve().parents[1]
|
| 9 |
+
ENV_PATH = ROOT_DIR / ".env"
|
| 10 |
+
|
| 11 |
+
if ENV_PATH.exists():
|
| 12 |
+
load_dotenv(dotenv_path=ENV_PATH)
|
| 13 |
+
|
| 14 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 15 |
+
space_id = os.getenv("HF_SPACE_ID")
|
| 16 |
+
api_bearer_token = os.getenv("API_BEARER_TOKEN")
|
| 17 |
+
cert_zscaler_pem = os.getenv("CERT_ZSCALER_PEM")
|
| 18 |
+
|
| 19 |
+
missing = [name for name, value in {
|
| 20 |
+
"HF_TOKEN": hf_token,
|
| 21 |
+
"HF_SPACE_ID": space_id,
|
| 22 |
+
"API_BEARER_TOKEN": api_bearer_token,
|
| 23 |
+
}.items() if not value]
|
| 24 |
+
|
| 25 |
+
if missing:
|
| 26 |
+
sys.stderr.write(f"Missing required env vars in .env: {', '.join(missing)}\n")
|
| 27 |
+
sys.exit(1)
|
| 28 |
+
|
| 29 |
+
if cert_zscaler_pem:
|
| 30 |
+
sys.stderr.write("CERT_ZSCALER_PEM is set. This script will not upload Zscaler certs to HF.\n")
|
| 31 |
+
sys.exit(1)
|
| 32 |
+
|
| 33 |
+
api = HfApi(token=hf_token)
|
| 34 |
+
api.add_space_secret(
|
| 35 |
+
repo_id=space_id,
|
| 36 |
+
key="API_BEARER_TOKEN",
|
| 37 |
+
value=api_bearer_token,
|
| 38 |
+
description="Bearer token for DDGS Search API",
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
print(f"Uploaded secret API_BEARER_TOKEN to {space_id}")
|
test_scripts/hf_upload_secrets.sh
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
| 5 |
+
VENV_DIR="$ROOT_DIR/.venv"
|
| 6 |
+
|
| 7 |
+
python3 -m venv "$VENV_DIR"
|
| 8 |
+
# shellcheck disable=SC1091
|
| 9 |
+
source "$VENV_DIR/bin/activate"
|
| 10 |
+
|
| 11 |
+
python -m pip install --upgrade pip >/dev/null
|
| 12 |
+
python -m pip install -e "$ROOT_DIR" >/dev/null
|
| 13 |
+
|
| 14 |
+
if [ -x "$ROOT_DIR/test_scripts/update_venv_cert.sh" ]; then
|
| 15 |
+
CERT_OUT=$("$ROOT_DIR/test_scripts/update_venv_cert.sh")
|
| 16 |
+
CERT_BUNDLE=$(printf "%s\n" "$CERT_OUT" | rg "^CERT_BUNDLE=" | tail -n 1 | cut -d= -f2-)
|
| 17 |
+
if [ -n "$CERT_BUNDLE" ]; then
|
| 18 |
+
export SSL_CERT_FILE="$CERT_BUNDLE"
|
| 19 |
+
export REQUESTS_CA_BUNDLE="$CERT_BUNDLE"
|
| 20 |
+
fi
|
| 21 |
+
fi
|
| 22 |
+
|
| 23 |
+
python "$ROOT_DIR/test_scripts/hf_upload_secrets.py"
|
test_scripts/run_smoke.sh
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
| 5 |
+
VENV_DIR="$ROOT_DIR/.venv"
|
| 6 |
+
|
| 7 |
+
python3 -m venv "$VENV_DIR"
|
| 8 |
+
# shellcheck disable=SC1091
|
| 9 |
+
source "$VENV_DIR/bin/activate"
|
| 10 |
+
|
| 11 |
+
python -m pip install --upgrade pip >/dev/null
|
| 12 |
+
python -m pip install -e "$ROOT_DIR" >/dev/null
|
| 13 |
+
if [ -x "$ROOT_DIR/test_scripts/update_venv_cert.sh" ]; then
|
| 14 |
+
CERT_OUT=$("$ROOT_DIR/test_scripts/update_venv_cert.sh")
|
| 15 |
+
CERT_BUNDLE=$(printf "%s\n" "$CERT_OUT" | rg "^CERT_BUNDLE=" | tail -n 1 | cut -d= -f2-)
|
| 16 |
+
if [ -n "$CERT_BUNDLE" ]; then
|
| 17 |
+
export SSL_CERT_FILE="$CERT_BUNDLE"
|
| 18 |
+
export REQUESTS_CA_BUNDLE="$CERT_BUNDLE"
|
| 19 |
+
export DDGS_VERIFY=true
|
| 20 |
+
fi
|
| 21 |
+
fi
|
| 22 |
+
|
| 23 |
+
# Debug SSL bundle configuration
|
| 24 |
+
if [ -n "${SSL_CERT_FILE:-}" ]; then
|
| 25 |
+
echo "SSL_CERT_FILE=$SSL_CERT_FILE"
|
| 26 |
+
fi
|
| 27 |
+
if [ -n "${REQUESTS_CA_BUNDLE:-}" ]; then
|
| 28 |
+
echo "REQUESTS_CA_BUNDLE=$REQUESTS_CA_BUNDLE"
|
| 29 |
+
fi
|
| 30 |
+
if [ -n "${SSL_CERT_FILE:-}" ] && [ -f "$SSL_CERT_FILE" ]; then
|
| 31 |
+
echo "SSL cert bundle size: $(wc -c < "$SSL_CERT_FILE") bytes"
|
| 32 |
+
fi
|
| 33 |
+
|
| 34 |
+
# Help output
|
| 35 |
+
DDS_HELP_OUT=$(ddgs-search --help | head -n 5)
|
| 36 |
+
echo "$DDS_HELP_OUT" >/dev/null
|
| 37 |
+
|
| 38 |
+
# Simple search (network required). If network/cert errors occur, skip with success.
|
| 39 |
+
set +e
|
| 40 |
+
ERR_FILE="$ROOT_DIR/.ddgs_search_err"
|
| 41 |
+
DDS_OUT=$(ddgs-search "openai" --max-results 1 --format json 2>"$ERR_FILE")
|
| 42 |
+
STATUS=$?
|
| 43 |
+
set -e
|
| 44 |
+
|
| 45 |
+
if [ "$STATUS" -ne 0 ]; then
|
| 46 |
+
if rg -q "CERTIFICATE_VERIFY_FAILED|TLS handshake failed|client error|relative URL without a base" "$ERR_FILE"; then
|
| 47 |
+
echo "SKIP: network/cert error running live search" >&2
|
| 48 |
+
exit 0
|
| 49 |
+
fi
|
| 50 |
+
cat "$ERR_FILE" >&2
|
| 51 |
+
exit "$STATUS"
|
| 52 |
+
fi
|
| 53 |
+
|
| 54 |
+
export DDS_OUT
|
| 55 |
+
python - <<'PY'
|
| 56 |
+
import json, os
|
| 57 |
+
payload = json.loads(os.environ["DDS_OUT"])
|
| 58 |
+
assert isinstance(payload, list)
|
| 59 |
+
print("OK: received", len(payload), "result(s)")
|
| 60 |
+
PY
|
test_scripts/update_venv_cert.sh
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
| 5 |
+
VENV_DIR="$ROOT_DIR/.venv"
|
| 6 |
+
CERT_SRC="$ROOT_DIR/certif_zscaler/Zscaler_Root_CA.pem"
|
| 7 |
+
CERT_DIR="$VENV_DIR/certs"
|
| 8 |
+
CERT_BUNDLE="$CERT_DIR/ca_bundle.pem"
|
| 9 |
+
|
| 10 |
+
if [ ! -f "$CERT_SRC" ]; then
|
| 11 |
+
echo "Missing certificate: $CERT_SRC" >&2
|
| 12 |
+
exit 1
|
| 13 |
+
fi
|
| 14 |
+
|
| 15 |
+
python3 -m venv "$VENV_DIR"
|
| 16 |
+
# shellcheck disable=SC1091
|
| 17 |
+
source "$VENV_DIR/bin/activate"
|
| 18 |
+
|
| 19 |
+
python -m pip install --upgrade pip >/dev/null
|
| 20 |
+
python -m pip install certifi >/dev/null
|
| 21 |
+
|
| 22 |
+
mkdir -p "$CERT_DIR"
|
| 23 |
+
python - <<'PY'
|
| 24 |
+
import certifi
|
| 25 |
+
from pathlib import Path
|
| 26 |
+
print(Path(certifi.where()))
|
| 27 |
+
PY
|
| 28 |
+
CERTIFI_PATH=$(python - <<'PY'
|
| 29 |
+
import certifi
|
| 30 |
+
from pathlib import Path
|
| 31 |
+
print(Path(certifi.where()))
|
| 32 |
+
PY
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
cat "$CERTIFI_PATH" "$CERT_SRC" > "$CERT_BUNDLE"
|
| 36 |
+
|
| 37 |
+
ACTIVATE_FILE="$VENV_DIR/bin/activate"
|
| 38 |
+
if ! rg -q "SSL_CERT_FILE" "$ACTIVATE_FILE"; then
|
| 39 |
+
{
|
| 40 |
+
echo ""
|
| 41 |
+
echo "# Added by update_venv_cert.sh"
|
| 42 |
+
echo "export SSL_CERT_FILE=\"$CERT_BUNDLE\""
|
| 43 |
+
echo "export REQUESTS_CA_BUNDLE=\"$CERT_BUNDLE\""
|
| 44 |
+
} >> "$ACTIVATE_FILE"
|
| 45 |
+
fi
|
| 46 |
+
|
| 47 |
+
echo "Updated venv cert bundle at: $CERT_BUNDLE"
|
| 48 |
+
echo "CERT_BUNDLE=$CERT_BUNDLE"
|