PraneshJs commited on
Commit
f8c8e08
·
verified ·
1 Parent(s): fe7e039

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +206 -206
app.py CHANGED
@@ -1,207 +1,207 @@
1
- import os
2
- import asyncio
3
- import requests
4
- import urllib3
5
- from openai import AzureOpenAI
6
- from dotenv import load_dotenv
7
- from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
8
- from crawl4ai.content_filter_strategy import PruningContentFilter
9
- from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
10
- from fastapi import FastAPI, HTTPException
11
- from fastapi.staticfiles import StaticFiles
12
- from fastapi.middleware.cors import CORSMiddleware
13
- from pydantic import BaseModel
14
- import uvicorn
15
- import json
16
-
17
- # Disable SSL warnings
18
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
19
-
20
- load_dotenv()
21
-
22
- # Initialize FastAPI app
23
- app = FastAPI(title="Search Assistant API")
24
-
25
- # Add CORS middleware
26
- app.add_middleware(
27
- CORSMiddleware,
28
- allow_origins=["*"], # In production, replace with specific origins
29
- allow_credentials=True,
30
- allow_methods=["*"],
31
- allow_headers=["*"],
32
- )
33
-
34
- # Mount static files
35
- # app.mount("/static", StaticFiles(directory="static"), name="static")
36
-
37
- # Initialize Azure OpenAI client
38
- client = AzureOpenAI(
39
- api_key=os.getenv("AZURE_OPENAI_KEY"),
40
- api_version="2025-01-01-preview",
41
- azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
42
- )
43
-
44
- SERPER_API_KEY = os.getenv("SERPER_API_KEY")
45
- DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT")
46
-
47
- class SearchRequest(BaseModel):
48
- question: str
49
- mode: str = "quick" # "quick" or "deep"
50
-
51
- class SearchResponse(BaseModel):
52
- answer: str
53
- sources: list
54
- mode: str
55
- status: str = "success"
56
-
57
- def search_serper(query):
58
- headers = {
59
- "X-API-KEY": SERPER_API_KEY,
60
- "Content-Type": "application/json"
61
- }
62
- payload = {"q": query}
63
- response = requests.post("https://google.serper.dev/search", headers=headers, json=payload, verify=False)
64
- results = response.json()
65
-
66
- # Return both snippets and URLs for crawling
67
- search_results = []
68
- for result in results.get("organic", [])[:3]: # Limit to top 3 for crawling
69
- title = result.get("title", "")
70
- snippet = result.get("snippet", "")
71
- url = result.get("link", "")
72
- search_results.append({
73
- "title": title,
74
- "snippet": snippet,
75
- "url": url
76
- })
77
-
78
- return search_results
79
-
80
- async def crawl_to_markdown(url: str) -> str:
81
- """Crawl a URL and return its content as markdown."""
82
- try:
83
- browser_conf = BrowserConfig(headless=True, verbose=False)
84
- filter_strategy = PruningContentFilter()
85
- md_gen = DefaultMarkdownGenerator(content_filter=filter_strategy)
86
- run_conf = CrawlerRunConfig(markdown_generator=md_gen)
87
-
88
- async with AsyncWebCrawler(config=browser_conf) as crawler:
89
- result = await crawler.arun(url=url, config=run_conf)
90
- return result.markdown.fit_markdown or result.markdown.raw_markdown or ""
91
- except Exception as e:
92
- return f"Crawl error for {url}: {str(e)}"
93
-
94
- async def generate_answer_with_crawling(question):
95
- """Generate answer using search results and crawled content."""
96
- try:
97
- # 1. Get search results
98
- search_results = search_serper(question)
99
-
100
- # 2. Crawl each URL to get full content
101
- crawled_content = []
102
- for result in search_results:
103
- url = result["url"]
104
- title = result["title"]
105
-
106
- print(f"Crawling: {title} ({url})")
107
- markdown_content = await crawl_to_markdown(url)
108
-
109
- # Limit content to avoid token limits
110
- content_snippet = markdown_content[:2000] if markdown_content else result["snippet"]
111
- crawled_content.append(f"## {title}\nSource: {url}\n\n{content_snippet}\n\n")
112
-
113
- # 3. Combine all content for context
114
- full_context = "\n".join(crawled_content)
115
-
116
- messages = [
117
- {"role": "system", "content": "You are a helpful assistant that answers questions using detailed web content. Provide citations with URLs when possible."},
118
- {"role": "user", "content": f"Based on the following web content, answer the question. Include relevant citations.\n\nContent:\n{full_context}\n\nQuestion: {question}"}
119
- ]
120
-
121
- response = client.chat.completions.create(
122
- model=DEPLOYMENT_NAME,
123
- messages=messages,
124
- temperature=0.8,
125
- max_tokens=800
126
- )
127
- return response.choices[0].message.content, search_results
128
-
129
- except Exception as e:
130
- return f"Error: {str(e)}", []
131
-
132
- def generate_answer(question):
133
- """Original function using just search snippets."""
134
- search_results = search_serper(question)
135
-
136
- snippets = []
137
- for result in search_results:
138
- title = result["title"]
139
- snippet = result["snippet"]
140
- url = result["url"]
141
- snippets.append(f"{title}: {snippet} ({url})")
142
-
143
- context = "\n".join(snippets)
144
- messages = [
145
- {"role": "system", "content": "You are a helpful assistant that answers using real-time search context."},
146
- {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"}
147
- ]
148
- response = client.chat.completions.create(
149
- model=DEPLOYMENT_NAME,
150
- messages=messages,
151
- temperature=0.8,
152
- max_tokens=800
153
- )
154
- return response.choices[0].message.content
155
-
156
- # API Endpoints
157
- @app.get("/")
158
- async def root():
159
- return {"status": "ok"}
160
-
161
-
162
- @app.post("/search")
163
- async def search_endpoint(request: SearchRequest):
164
- """Search endpoint that returns JSON response."""
165
- try:
166
- print(f"\n🔍 Search Request:")
167
- print(f"Question: {request.question}")
168
- print(f"Mode: {request.mode}")
169
-
170
- if request.mode == "deep":
171
- print("🕷️ Starting deep search with web crawling...")
172
- answer, sources = await generate_answer_with_crawling(request.question)
173
- else:
174
- print("⚡ Starting quick search...")
175
- answer = generate_answer(request.question)
176
- sources = search_serper(request.question)
177
-
178
- response_data = {
179
- "answer": answer,
180
- "sources": sources,
181
- "mode": request.mode,
182
- "status": "success"
183
- }
184
-
185
- print(f"\n📋 Response Data:")
186
- print(json.dumps(response_data, indent=2))
187
-
188
- return response_data
189
-
190
- except Exception as e:
191
- error_response = {
192
- "answer": f"Error: {str(e)}",
193
- "sources": [],
194
- "mode": request.mode,
195
- "status": "error"
196
- }
197
-
198
- print(f"\n❌ Error Response:")
199
- print(json.dumps(error_response, indent=2))
200
-
201
- raise HTTPException(status_code=500, detail=error_response)
202
-
203
- if __name__ == "__main__":
204
- port = int(os.getenv("PORT", 5000))
205
- print("🚀 Starting Search Assistant Server...")
206
- print(f"📱 Port: {port}")
207
  uvicorn.run(app, host="0.0.0.0", port=port)
 
1
+ import os
2
+ import asyncio
3
+ import requests
4
+ import urllib3
5
+ from openai import AzureOpenAI
6
+ from dotenv import load_dotenv
7
+ from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
8
+ from crawl4ai.content_filter_strategy import PruningContentFilter
9
+ from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
10
+ from fastapi import FastAPI, HTTPException
11
+ from fastapi.staticfiles import StaticFiles
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+ from pydantic import BaseModel
14
+ import uvicorn
15
+ import json
16
+
17
+ # Disable SSL warnings
18
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
19
+
20
+ load_dotenv()
21
+
22
+ # Initialize FastAPI app
23
+ app = FastAPI(title="Search Assistant API")
24
+
25
+ # Add CORS middleware
26
+ app.add_middleware(
27
+ CORSMiddleware,
28
+ allow_origins=["*"], # In production, replace with specific origins
29
+ allow_credentials=True,
30
+ allow_methods=["*"],
31
+ allow_headers=["*"],
32
+ )
33
+
34
+ # Mount static files
35
+ # app.mount("/static", StaticFiles(directory="static"), name="static")
36
+
37
+ # Initialize Azure OpenAI client
38
+ client = AzureOpenAI(
39
+ api_key=os.getenv("AZURE_OPENAI_KEY").strip(),
40
+ api_version="2025-01-01-preview",
41
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip()
42
+ )
43
+
44
+ SERPER_API_KEY = os.getenv("SERPER_API_KEY").strip()
45
+ DEPLOYMENT_NAME = os.getenv("AZURE_OPENAI_DEPLOYMENT").strip()
46
+
47
+ class SearchRequest(BaseModel):
48
+ question: str
49
+ mode: str = "quick" # "quick" or "deep"
50
+
51
+ class SearchResponse(BaseModel):
52
+ answer: str
53
+ sources: list
54
+ mode: str
55
+ status: str = "success"
56
+
57
+ def search_serper(query):
58
+ headers = {
59
+ "X-API-KEY": SERPER_API_KEY,
60
+ "Content-Type": "application/json"
61
+ }
62
+ payload = {"q": query}
63
+ response = requests.post("https://google.serper.dev/search", headers=headers, json=payload, verify=False)
64
+ results = response.json()
65
+
66
+ # Return both snippets and URLs for crawling
67
+ search_results = []
68
+ for result in results.get("organic", [])[:3]: # Limit to top 3 for crawling
69
+ title = result.get("title", "")
70
+ snippet = result.get("snippet", "")
71
+ url = result.get("link", "")
72
+ search_results.append({
73
+ "title": title,
74
+ "snippet": snippet,
75
+ "url": url
76
+ })
77
+
78
+ return search_results
79
+
80
+ async def crawl_to_markdown(url: str) -> str:
81
+ """Crawl a URL and return its content as markdown."""
82
+ try:
83
+ browser_conf = BrowserConfig(headless=True, verbose=False)
84
+ filter_strategy = PruningContentFilter()
85
+ md_gen = DefaultMarkdownGenerator(content_filter=filter_strategy)
86
+ run_conf = CrawlerRunConfig(markdown_generator=md_gen)
87
+
88
+ async with AsyncWebCrawler(config=browser_conf) as crawler:
89
+ result = await crawler.arun(url=url, config=run_conf)
90
+ return result.markdown.fit_markdown or result.markdown.raw_markdown or ""
91
+ except Exception as e:
92
+ return f"Crawl error for {url}: {str(e)}"
93
+
94
+ async def generate_answer_with_crawling(question):
95
+ """Generate answer using search results and crawled content."""
96
+ try:
97
+ # 1. Get search results
98
+ search_results = search_serper(question)
99
+
100
+ # 2. Crawl each URL to get full content
101
+ crawled_content = []
102
+ for result in search_results:
103
+ url = result["url"]
104
+ title = result["title"]
105
+
106
+ print(f"Crawling: {title} ({url})")
107
+ markdown_content = await crawl_to_markdown(url)
108
+
109
+ # Limit content to avoid token limits
110
+ content_snippet = markdown_content[:2000] if markdown_content else result["snippet"]
111
+ crawled_content.append(f"## {title}\nSource: {url}\n\n{content_snippet}\n\n")
112
+
113
+ # 3. Combine all content for context
114
+ full_context = "\n".join(crawled_content)
115
+
116
+ messages = [
117
+ {"role": "system", "content": "You are a helpful assistant that answers questions using detailed web content. Provide citations with URLs when possible."},
118
+ {"role": "user", "content": f"Based on the following web content, answer the question. Include relevant citations.\n\nContent:\n{full_context}\n\nQuestion: {question}"}
119
+ ]
120
+
121
+ response = client.chat.completions.create(
122
+ model=DEPLOYMENT_NAME,
123
+ messages=messages,
124
+ temperature=0.8,
125
+ max_tokens=800
126
+ )
127
+ return response.choices[0].message.content, search_results
128
+
129
+ except Exception as e:
130
+ return f"Error: {str(e)}", []
131
+
132
+ def generate_answer(question):
133
+ """Original function using just search snippets."""
134
+ search_results = search_serper(question)
135
+
136
+ snippets = []
137
+ for result in search_results:
138
+ title = result["title"]
139
+ snippet = result["snippet"]
140
+ url = result["url"]
141
+ snippets.append(f"{title}: {snippet} ({url})")
142
+
143
+ context = "\n".join(snippets)
144
+ messages = [
145
+ {"role": "system", "content": "You are a helpful assistant that answers using real-time search context."},
146
+ {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {question}"}
147
+ ]
148
+ response = client.chat.completions.create(
149
+ model=DEPLOYMENT_NAME,
150
+ messages=messages,
151
+ temperature=0.8,
152
+ max_tokens=800
153
+ )
154
+ return response.choices[0].message.content
155
+
156
+ # API Endpoints
157
+ @app.get("/")
158
+ async def root():
159
+ return {"status": "ok"}
160
+
161
+
162
+ @app.post("/search")
163
+ async def search_endpoint(request: SearchRequest):
164
+ """Search endpoint that returns JSON response."""
165
+ try:
166
+ print(f"\n🔍 Search Request:")
167
+ print(f"Question: {request.question}")
168
+ print(f"Mode: {request.mode}")
169
+
170
+ if request.mode == "deep":
171
+ print("🕷️ Starting deep search with web crawling...")
172
+ answer, sources = await generate_answer_with_crawling(request.question)
173
+ else:
174
+ print("⚡ Starting quick search...")
175
+ answer = generate_answer(request.question)
176
+ sources = search_serper(request.question)
177
+
178
+ response_data = {
179
+ "answer": answer,
180
+ "sources": sources,
181
+ "mode": request.mode,
182
+ "status": "success"
183
+ }
184
+
185
+ print(f"\n📋 Response Data:")
186
+ print(json.dumps(response_data, indent=2))
187
+
188
+ return response_data
189
+
190
+ except Exception as e:
191
+ error_response = {
192
+ "answer": f"Error: {str(e)}",
193
+ "sources": [],
194
+ "mode": request.mode,
195
+ "status": "error"
196
+ }
197
+
198
+ print(f"\n❌ Error Response:")
199
+ print(json.dumps(error_response, indent=2))
200
+
201
+ raise HTTPException(status_code=500, detail=error_response)
202
+
203
+ if __name__ == "__main__":
204
+ port = int(os.getenv("PORT", 5000))
205
+ print("🚀 Starting Search Assistant Server...")
206
+ print(f"📱 Port: {port}")
207
  uvicorn.run(app, host="0.0.0.0", port=port)