Husnain Rasheed commited on
Commit
368667f
·
verified ·
1 Parent(s): 9adc439

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +93 -73
main.py CHANGED
@@ -1,84 +1,104 @@
 
 
1
  import httpx
2
- import json
3
  from fastapi import FastAPI, HTTPException, Query
4
- from pydantic import BaseModel, Field
5
- from typing import Optional, List, Dict, Any
6
 
7
- # Initialize the FastAPI app
 
 
8
  app = FastAPI(
9
- title="FastAPI DuckDuckGo Instant Answer API Proxy",
10
- description="A proxy for the DuckDuckGo Instant Answer API. This API provides direct answers and summaries, but not a list of general web search results.",
11
- version="1.1.0",
 
 
 
12
  )
13
 
14
- # Define Pydantic models for a cleaner response structure
15
- class SearchResult(BaseModel):
16
- heading: Optional[str] = Field(None, description="The title of the answer.")
17
- answer: Optional[str] = Field(None, description="A direct answer to the query.")
18
- abstract: Optional[str] = Field(None, description="A summary or abstract of the topic.")
19
- abstract_url: Optional[str] = Field(None, description="The URL for the abstract source.")
20
- image_url: Optional[str] = Field(None, description="A relevant image URL.")
21
- related_topics: List[Dict[str, Any]] = Field([], description="A list of related topics.")
22
- raw_response: Dict[str, Any] = Field({}, description="The full, raw JSON response from the API.")
23
-
24
- # Define the base URL for the DuckDuckGo Instant Answer API
25
- DUCKDUCKGO_API_URL = "https://api.duckduckgo.com/"
26
-
27
- @app.get("/search",
28
- tags=["Search"],
29
- response_model=SearchResult,
30
- summary="Get an Instant Answer from DuckDuckGo")
31
- async def search_duckduckgo(
32
- q: str = Query(..., description="The search query. Try 'what is the capital of France' or 'Google' to see results."),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  ):
34
- """
35
- Performs a search using the DuckDuckGo Instant Answer API.
36
-
37
- This endpoint returns structured information for queries that have a direct answer.
38
- It will return an empty response for general web search queries that do not trigger an Instant Answer.
39
- """
40
- params = {
41
- "q": q,
42
- "format": "json",
43
- "no_html": 1,
 
 
 
 
 
 
44
  }
45
 
46
- async with httpx.AsyncClient() as client:
47
- try:
48
- response = await client.get(DUCKDUCKGO_API_URL, params=params)
49
  response.raise_for_status()
 
 
 
 
50
 
51
- if not response.text:
52
- return SearchResult(raw_response={"message": "No results found."})
53
-
54
- data = response.json()
55
-
56
- # Check if a meaningful answer is present.
57
- # The 'Type' field is 'A' for Article, 'D' for Disambiguation, 'C' for category. Empty for no result.
58
- if data.get("Type") or data.get("AbstractText"):
59
- return SearchResult(
60
- heading=data.get("Heading"),
61
- answer=data.get("Answer"),
62
- abstract=data.get("AbstractText"),
63
- abstract_url=data.get("AbstractURL"),
64
- image_url=f'https://duckduckgo.com{data.get("Image")}' if data.get("Image") else None,
65
- related_topics=data.get("RelatedTopics", []),
66
- raw_response=data
67
- )
68
- else:
69
- # Return an empty but valid SearchResult if no instant answer was found
70
- return SearchResult(
71
- raw_response=data,
72
- related_topics=[{"message": "No direct Instant Answer found for this query."}]
73
- )
74
-
75
- except json.JSONDecodeError:
76
- raise HTTPException(status_code=500, detail="Failed to decode JSON from DuckDuckGo API.")
77
- except httpx.HTTPStatusError as e:
78
- raise HTTPException(status_code=e.response.status_code, detail=f"Error from DuckDuckGo API: {e.response.text}")
79
- except httpx.RequestError as e:
80
- raise HTTPException(status_code=500, detail=f"Failed to connect to DuckDuckGo API: {str(e)}")
81
-
82
- @app.get("/", tags=["Root"])
83
- async def read_root():
84
- return {"message": "Welcome to the DuckDuckGo Instant Answer API proxy!"}
 
1
+ from typing import List, Optional
2
+
3
  import httpx
4
+ from bs4 import BeautifulSoup
5
  from fastapi import FastAPI, HTTPException, Query
 
 
6
 
7
+ UPSTREAM_URL = "https://lite.duckduckgo.com/lite/"
8
+ DEFAULT_KL = "wt-wt"
9
+
10
  app = FastAPI(
11
+ title="DuckDuckGo Lite Web Search Proxy",
12
+ description=(
13
+ "A tiny FastAPI wrapper that proxies search queries to DuckDuckGo Lite and returns"
14
+ " structured JSON results."
15
+ ),
16
+ version="1.0.0",
17
  )
18
 
19
+
20
+ def _extract_results(html: str) -> List[dict]:
21
+ """Parse DuckDuckGo Lite HTML into a list of search results."""
22
+ soup = BeautifulSoup(html, "html.parser")
23
+ results: List[dict] = []
24
+
25
+ for table in soup.select("table.result"):
26
+ link_tag = table.select_one("td.result-link a")
27
+ if not link_tag:
28
+ continue
29
+
30
+ title = link_tag.get_text(strip=True)
31
+ url = link_tag.get("href")
32
+ if not url:
33
+ continue
34
+
35
+ snippet_tag = table.select_one("td.result-snippet")
36
+ snippet = snippet_tag.get_text(" ", strip=True) if snippet_tag else None
37
+
38
+ results.append({
39
+ "title": title,
40
+ "url": url,
41
+ "snippet": snippet,
42
+ })
43
+
44
+ if not results:
45
+ # As a fallback, try to find plain links if the expected structure changes.
46
+ for link_tag in soup.select("a.result-link"):
47
+ title = link_tag.get_text(strip=True)
48
+ url = link_tag.get("href")
49
+ if not title or not url:
50
+ continue
51
+ sibling = link_tag.find_parent().find_next_sibling() if link_tag.find_parent() else None
52
+ snippet = sibling.get_text(" ", strip=True) if sibling else None
53
+ results.append({
54
+ "title": title,
55
+ "url": url,
56
+ "snippet": snippet,
57
+ })
58
+
59
+ return results
60
+
61
+
62
+ @app.post("/lite/")
63
+ async def search_duckduckgo_lite(
64
+ q: str = Query(..., description="keywords for query", min_length=1),
65
+ s: Optional[int] = Query(None, description="can be `0`"),
66
+ o: Optional[str] = Query(None, description="can be `json`"),
67
+ api: Optional[str] = Query(None, description="can be `d.js`"),
68
+ kl: Optional[str] = Query(None, description="market/locale code"),
69
+ bing_market: Optional[str] = Query(None, description="market/locale code"),
70
  ):
71
+ params = {"q": q}
72
+ if s is not None:
73
+ params["s"] = str(s)
74
+ if o:
75
+ params["o"] = o
76
+ if api:
77
+ params["api"] = api
78
+ params["kl"] = kl or DEFAULT_KL
79
+ if bing_market:
80
+ params["bing_market"] = bing_market
81
+
82
+ headers = {
83
+ "User-Agent": (
84
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
85
+ "(KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
86
+ )
87
  }
88
 
89
+ try:
90
+ async with httpx.AsyncClient(timeout=httpx.Timeout(10.0)) as client:
91
+ response = await client.get(UPSTREAM_URL, params=params, headers=headers)
92
  response.raise_for_status()
93
+ except httpx.HTTPError as exc:
94
+ raise HTTPException(status_code=502, detail="Upstream search failed") from exc
95
+
96
+ results = _extract_results(response.text)
97
 
98
+ return {
99
+ "query": q,
100
+ "kl": params.get("kl"),
101
+ "bing_market": params.get("bing_market"),
102
+ "count": len(results),
103
+ "results": results,
104
+ }