sae8d commited on
Commit
2d91f26
·
verified ·
1 Parent(s): d7850ea

Upload 36 files

Browse files
api/__pycache__/main.cpython-313.pyc CHANGED
Binary files a/api/__pycache__/main.cpython-313.pyc and b/api/__pycache__/main.cpython-313.pyc differ
 
api/__pycache__/schemas.cpython-313.pyc CHANGED
Binary files a/api/__pycache__/schemas.cpython-313.pyc and b/api/__pycache__/schemas.cpython-313.pyc differ
 
api/__pycache__/search.cpython-313.pyc CHANGED
Binary files a/api/__pycache__/search.cpython-313.pyc and b/api/__pycache__/search.cpython-313.pyc differ
 
api/main.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  from fastapi import FastAPI, HTTPException, Query
3
  from api.schemas import SearchResponse, StatsResponse, DocumentResponse, CrawlRequest
4
- from api.search import rank_search, wildcard_search
5
  from db.supabase_client import get_supabase
6
  from indexer.tasks import celery_app
7
  from typing import Optional
@@ -11,10 +11,13 @@ app = FastAPI(title="Information Retrieval System API")
11
  @app.get("/search", response_model=SearchResponse)
12
  async def search(q: str, k: int = Query(10, gt=0)):
13
  results = rank_search(q, k)
 
14
  return {
15
  "query": q,
16
  "total_results": len(results),
17
- "results": results
 
 
18
  }
19
 
20
  @app.get("/search/wildcard", response_model=SearchResponse)
@@ -25,10 +28,13 @@ async def search_wildcard(q: str, k: int = Query(10, gt=0)):
25
  return await search(q, k)
26
 
27
  results = wildcard_search(q, k)
 
28
  return {
29
  "query": q,
30
  "total_results": len(results),
31
- "results": results
 
 
32
  }
33
 
34
  @app.post("/crawl")
 
1
  import os
2
  from fastapi import FastAPI, HTTPException, Query
3
  from api.schemas import SearchResponse, StatsResponse, DocumentResponse, CrawlRequest
4
+ from api.search import rank_search, wildcard_search, fetch_ddgs_results
5
  from db.supabase_client import get_supabase
6
  from indexer.tasks import celery_app
7
  from typing import Optional
 
11
  @app.get("/search", response_model=SearchResponse)
12
  async def search(q: str, k: int = Query(10, gt=0)):
13
  results = rank_search(q, k)
14
+ ddgs_results, ddgs_images = fetch_ddgs_results(q, 5)
15
  return {
16
  "query": q,
17
  "total_results": len(results),
18
+ "results": results,
19
+ "ddgs_results": ddgs_results,
20
+ "ddgs_images": ddgs_images
21
  }
22
 
23
  @app.get("/search/wildcard", response_model=SearchResponse)
 
28
  return await search(q, k)
29
 
30
  results = wildcard_search(q, k)
31
+ ddgs_results, ddgs_images = fetch_ddgs_results(q, 5)
32
  return {
33
  "query": q,
34
  "total_results": len(results),
35
+ "results": results,
36
+ "ddgs_results": ddgs_results,
37
+ "ddgs_images": ddgs_images
38
  }
39
 
40
  @app.post("/crawl")
api/schemas.py CHANGED
@@ -9,10 +9,23 @@ class SearchResult(BaseModel):
9
  image_url: Optional[str]
10
  score: float
11
 
 
 
 
 
 
 
 
 
 
 
 
12
  class SearchResponse(BaseModel):
13
  query: str
14
  total_results: int
15
  results: List[SearchResult]
 
 
16
 
17
  class CrawlRequest(BaseModel):
18
  url: Optional[str] = None
 
9
  image_url: Optional[str]
10
  score: float
11
 
12
+ class DDGSResult(BaseModel):
13
+ title: str
14
+ href: str
15
+ body: str
16
+
17
+ class DDGSImage(BaseModel):
18
+ title: str
19
+ image: str
20
+ thumbnail: str
21
+ url: str
22
+
23
  class SearchResponse(BaseModel):
24
  query: str
25
  total_results: int
26
  results: List[SearchResult]
27
+ ddgs_results: Optional[List[DDGSResult]] = []
28
+ ddgs_images: Optional[List[DDGSImage]] = []
29
 
30
  class CrawlRequest(BaseModel):
31
  url: Optional[str] = None
api/search.py CHANGED
@@ -1,5 +1,25 @@
1
  from db.supabase_client import get_supabase
2
  from indexer.preprocess import preprocess
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def rank_search(query: str, k: int = 10):
5
  supabase = get_supabase()
 
1
  from db.supabase_client import get_supabase
2
  from indexer.preprocess import preprocess
3
+ from ddgs import DDGS
4
+
5
+ def fetch_ddgs_results(query: str, max_results: int = 5):
6
+ """Fetch results and images from DuckDuckGo using ddgs."""
7
+ results = []
8
+ images = []
9
+
10
+ try:
11
+ with DDGS() as ddgs:
12
+ # Fetch text results
13
+ ddgs_gen = ddgs.text(query, max_results=max_results, safesearch='on')
14
+ results = list(ddgs_gen)
15
+
16
+ # Fetch image results
17
+ ddgs_images_gen = ddgs.images(query, max_results=max_results, safesearch='on')
18
+ images = list(ddgs_images_gen)
19
+ except Exception as e:
20
+ print(f"Error fetching DDGS results: {e}")
21
+
22
+ return results, images
23
 
24
  def rank_search(query: str, k: int = 10):
25
  supabase = get_supabase()
requirements.txt CHANGED
@@ -1,14 +1,15 @@
1
- scrapy
2
- beautifulsoup4
3
- lxml
4
- langdetect
5
- fastapi
6
- uvicorn
7
- supabase
8
- nltk
9
- PyStemmer
10
- celery[redis]
11
- python-dotenv
12
- httpx
13
- pydantic-settings
14
- pydantic
 
 
1
+ scrapy
2
+ beautifulsoup4
3
+ lxml
4
+ langdetect
5
+ fastapi
6
+ uvicorn
7
+ supabase
8
+ nltk
9
+ PyStemmer
10
+ celery[redis]
11
+ python-dotenv
12
+ httpx
13
+ pydantic-settings
14
+ pydantic
15
+ ddgs