Spaces:
Running
Running
Upload 36 files
Browse files- api/__pycache__/main.cpython-313.pyc +0 -0
- api/__pycache__/schemas.cpython-313.pyc +0 -0
- api/__pycache__/search.cpython-313.pyc +0 -0
- api/main.py +9 -3
- api/schemas.py +13 -0
- api/search.py +20 -0
- requirements.txt +15 -14
api/__pycache__/main.cpython-313.pyc
CHANGED
|
Binary files a/api/__pycache__/main.cpython-313.pyc and b/api/__pycache__/main.cpython-313.pyc differ
|
|
|
api/__pycache__/schemas.cpython-313.pyc
CHANGED
|
Binary files a/api/__pycache__/schemas.cpython-313.pyc and b/api/__pycache__/schemas.cpython-313.pyc differ
|
|
|
api/__pycache__/search.cpython-313.pyc
CHANGED
|
Binary files a/api/__pycache__/search.cpython-313.pyc and b/api/__pycache__/search.cpython-313.pyc differ
|
|
|
api/main.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
from fastapi import FastAPI, HTTPException, Query
|
| 3 |
from api.schemas import SearchResponse, StatsResponse, DocumentResponse, CrawlRequest
|
| 4 |
-
from api.search import rank_search, wildcard_search
|
| 5 |
from db.supabase_client import get_supabase
|
| 6 |
from indexer.tasks import celery_app
|
| 7 |
from typing import Optional
|
|
@@ -11,10 +11,13 @@ app = FastAPI(title="Information Retrieval System API")
|
|
| 11 |
@app.get("/search", response_model=SearchResponse)
|
| 12 |
async def search(q: str, k: int = Query(10, gt=0)):
|
| 13 |
results = rank_search(q, k)
|
|
|
|
| 14 |
return {
|
| 15 |
"query": q,
|
| 16 |
"total_results": len(results),
|
| 17 |
-
"results": results
|
|
|
|
|
|
|
| 18 |
}
|
| 19 |
|
| 20 |
@app.get("/search/wildcard", response_model=SearchResponse)
|
|
@@ -25,10 +28,13 @@ async def search_wildcard(q: str, k: int = Query(10, gt=0)):
|
|
| 25 |
return await search(q, k)
|
| 26 |
|
| 27 |
results = wildcard_search(q, k)
|
|
|
|
| 28 |
return {
|
| 29 |
"query": q,
|
| 30 |
"total_results": len(results),
|
| 31 |
-
"results": results
|
|
|
|
|
|
|
| 32 |
}
|
| 33 |
|
| 34 |
@app.post("/crawl")
|
|
|
|
| 1 |
import os
|
| 2 |
from fastapi import FastAPI, HTTPException, Query
|
| 3 |
from api.schemas import SearchResponse, StatsResponse, DocumentResponse, CrawlRequest
|
| 4 |
+
from api.search import rank_search, wildcard_search, fetch_ddgs_results
|
| 5 |
from db.supabase_client import get_supabase
|
| 6 |
from indexer.tasks import celery_app
|
| 7 |
from typing import Optional
|
|
|
|
| 11 |
@app.get("/search", response_model=SearchResponse)
|
| 12 |
async def search(q: str, k: int = Query(10, gt=0)):
|
| 13 |
results = rank_search(q, k)
|
| 14 |
+
ddgs_results, ddgs_images = fetch_ddgs_results(q, 5)
|
| 15 |
return {
|
| 16 |
"query": q,
|
| 17 |
"total_results": len(results),
|
| 18 |
+
"results": results,
|
| 19 |
+
"ddgs_results": ddgs_results,
|
| 20 |
+
"ddgs_images": ddgs_images
|
| 21 |
}
|
| 22 |
|
| 23 |
@app.get("/search/wildcard", response_model=SearchResponse)
|
|
|
|
| 28 |
return await search(q, k)
|
| 29 |
|
| 30 |
results = wildcard_search(q, k)
|
| 31 |
+
ddgs_results, ddgs_images = fetch_ddgs_results(q, 5)
|
| 32 |
return {
|
| 33 |
"query": q,
|
| 34 |
"total_results": len(results),
|
| 35 |
+
"results": results,
|
| 36 |
+
"ddgs_results": ddgs_results,
|
| 37 |
+
"ddgs_images": ddgs_images
|
| 38 |
}
|
| 39 |
|
| 40 |
@app.post("/crawl")
|
api/schemas.py
CHANGED
|
@@ -9,10 +9,23 @@ class SearchResult(BaseModel):
|
|
| 9 |
image_url: Optional[str]
|
| 10 |
score: float
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
class SearchResponse(BaseModel):
|
| 13 |
query: str
|
| 14 |
total_results: int
|
| 15 |
results: List[SearchResult]
|
|
|
|
|
|
|
| 16 |
|
| 17 |
class CrawlRequest(BaseModel):
|
| 18 |
url: Optional[str] = None
|
|
|
|
| 9 |
image_url: Optional[str]
|
| 10 |
score: float
|
| 11 |
|
| 12 |
+
class DDGSResult(BaseModel):
|
| 13 |
+
title: str
|
| 14 |
+
href: str
|
| 15 |
+
body: str
|
| 16 |
+
|
| 17 |
+
class DDGSImage(BaseModel):
|
| 18 |
+
title: str
|
| 19 |
+
image: str
|
| 20 |
+
thumbnail: str
|
| 21 |
+
url: str
|
| 22 |
+
|
| 23 |
class SearchResponse(BaseModel):
|
| 24 |
query: str
|
| 25 |
total_results: int
|
| 26 |
results: List[SearchResult]
|
| 27 |
+
ddgs_results: Optional[List[DDGSResult]] = []
|
| 28 |
+
ddgs_images: Optional[List[DDGSImage]] = []
|
| 29 |
|
| 30 |
class CrawlRequest(BaseModel):
|
| 31 |
url: Optional[str] = None
|
api/search.py
CHANGED
|
@@ -1,5 +1,25 @@
|
|
| 1 |
from db.supabase_client import get_supabase
|
| 2 |
from indexer.preprocess import preprocess
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
def rank_search(query: str, k: int = 10):
|
| 5 |
supabase = get_supabase()
|
|
|
|
| 1 |
from db.supabase_client import get_supabase
|
| 2 |
from indexer.preprocess import preprocess
|
| 3 |
+
from ddgs import DDGS
|
| 4 |
+
|
| 5 |
+
def fetch_ddgs_results(query: str, max_results: int = 5):
|
| 6 |
+
"""Fetch results and images from DuckDuckGo using ddgs."""
|
| 7 |
+
results = []
|
| 8 |
+
images = []
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
with DDGS() as ddgs:
|
| 12 |
+
# Fetch text results
|
| 13 |
+
ddgs_gen = ddgs.text(query, max_results=max_results, safesearch='on')
|
| 14 |
+
results = list(ddgs_gen)
|
| 15 |
+
|
| 16 |
+
# Fetch image results
|
| 17 |
+
ddgs_images_gen = ddgs.images(query, max_results=max_results, safesearch='on')
|
| 18 |
+
images = list(ddgs_images_gen)
|
| 19 |
+
except Exception as e:
|
| 20 |
+
print(f"Error fetching DDGS results: {e}")
|
| 21 |
+
|
| 22 |
+
return results, images
|
| 23 |
|
| 24 |
def rank_search(query: str, k: int = 10):
|
| 25 |
supabase = get_supabase()
|
requirements.txt
CHANGED
|
@@ -1,14 +1,15 @@
|
|
| 1 |
-
scrapy
|
| 2 |
-
beautifulsoup4
|
| 3 |
-
lxml
|
| 4 |
-
langdetect
|
| 5 |
-
fastapi
|
| 6 |
-
uvicorn
|
| 7 |
-
supabase
|
| 8 |
-
nltk
|
| 9 |
-
PyStemmer
|
| 10 |
-
celery[redis]
|
| 11 |
-
python-dotenv
|
| 12 |
-
httpx
|
| 13 |
-
pydantic-settings
|
| 14 |
-
pydantic
|
|
|
|
|
|
| 1 |
+
scrapy
|
| 2 |
+
beautifulsoup4
|
| 3 |
+
lxml
|
| 4 |
+
langdetect
|
| 5 |
+
fastapi
|
| 6 |
+
uvicorn
|
| 7 |
+
supabase
|
| 8 |
+
nltk
|
| 9 |
+
PyStemmer
|
| 10 |
+
celery[redis]
|
| 11 |
+
python-dotenv
|
| 12 |
+
httpx
|
| 13 |
+
pydantic-settings
|
| 14 |
+
pydantic
|
| 15 |
+
ddgs
|