tecuts commited on
Commit
4aab314
·
verified ·
1 Parent(s): 20755e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +283 -52
app.py CHANGED
@@ -1,9 +1,16 @@
1
  import os
2
  import json
3
  import requests
4
- from fastapi import FastAPI, Request
 
 
5
  from fastapi.middleware.cors import CORSMiddleware
6
  from openai import OpenAI
 
 
 
 
 
7
 
8
  # --- Load API Keys from Environment Variables ---
9
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
@@ -11,119 +18,343 @@ GOOGLE_CX = os.getenv("GOOGLE_CX")
11
  LLM_API_KEY = os.getenv("LLM_API_KEY")
12
  LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api-15i2e8ze256bvfn6.aistudio-app.com/v1")
13
 
14
- # --- Web Search Tool Implementation ---
15
- def Google_Search_tool(queries: list) -> list:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  if not GOOGLE_API_KEY or not GOOGLE_CX:
17
- print("ERROR: GOOGLE_API_KEY or GOOGLE_CX environment variables not set.")
18
  return []
19
 
20
- query = queries[0]
21
- print(f"Executing Google Custom Search for: '{query}'")
 
 
 
 
22
 
23
  search_url = "https://www.googleapis.com/customsearch/v1"
24
- params = {"key": GOOGLE_API_KEY, "cx": GOOGLE_CX, "q": query, "num": 3}
 
 
 
 
 
 
25
 
26
  try:
27
- response = requests.get(search_url, params=params, timeout=10)
28
  response.raise_for_status()
29
  search_results = response.json()
30
 
31
- # Structure the results for internal use
 
 
 
 
32
  parsed_results = []
33
  for item in search_results.get("items", []):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  parsed_results.append({
35
- "source_title": item.get("title"),
36
- "url": item.get("link"),
37
- "snippet": item.get("snippet")
 
 
38
  })
 
 
39
  return parsed_results
40
 
 
 
 
41
  except requests.exceptions.RequestException as e:
42
- print(f"Error during Google search request: {e}")
 
 
 
43
  return []
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # --- FastAPI Application Setup ---
46
- app = FastAPI()
 
47
  app.add_middleware(
48
  CORSMiddleware,
49
- allow_origins=["*"],
50
  allow_credentials=True,
51
  allow_methods=["*"],
52
  allow_headers=["*"],
53
  )
54
 
55
  # --- OpenAI Client Initialization ---
56
- client = OpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL) if LLM_API_KEY and LLM_BASE_URL else None
 
 
 
 
 
57
 
58
- # --- LLM Tool Definition ---
59
- available_tools = [{"type": "function", "function": {"name": "Google Search", "description": "Performs a Google search for up-to-date information.", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "The search query."}}, "required": ["query"]}}}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- # --- Chatbot Endpoint ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  @app.post("/chat")
63
  async def chat_endpoint(request: Request):
64
  if not client:
65
- return {"response": "Error: LLM client not configured.", "sources": []}
66
 
67
  try:
68
  data = await request.json()
69
- user_message = data.get("message")
70
- use_search = data.get("use_search", True)
 
71
 
72
  if not user_message:
73
- return {"response": "Error: No message provided.", "sources": []}
74
 
75
- messages = data.get("history", []) + [{"role": "user", "content": user_message}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- llm_kwargs = {"model": "unsloth/Qwen3-30B-A3B-GGUF", "temperature": 0.6, "messages": messages}
78
  if use_search:
79
- print("INFO: Search is ENABLED.")
80
  llm_kwargs["tools"] = available_tools
81
  llm_kwargs["tool_choice"] = "auto"
82
  else:
83
- print("INFO: Search is DISABLED.")
84
 
 
85
  llm_response = client.chat.completions.create(**llm_kwargs)
86
-
87
  tool_calls = llm_response.choices[0].message.tool_calls
88
- source_links = [] # Initialize source links list
89
 
90
  if tool_calls:
 
91
  tool_outputs = []
 
92
  for tool_call in tool_calls:
93
- if tool_call.function.name == "Google Search":
94
- function_args = json.loads(tool_call.function.arguments)
95
- search_query = function_args.get("query")
96
-
97
- if search_query:
98
- search_results = Google_Search_tool(queries=[search_query])
99
-
100
- formatted_snippets = []
101
- for res in search_results:
102
- # Store title and URL for the final response
103
- source_links.append({"title": res["source_title"], "url": res["url"]})
104
- # Format snippet for the LLM context
105
- formatted_snippets.append(f"Source: {res['source_title']}\nSnippet: {res['snippet']}")
106
 
107
- tool_output_content = "Search Results:\n" + "\n---\n".join(formatted_snippets) if formatted_snippets else "No relevant search results found."
108
- tool_outputs.append({"tool_call_id": tool_call.id, "output": tool_output_content})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
 
110
  messages.append(llm_response.choices[0].message)
111
  for output_item in tool_outputs:
112
- messages.append({"role": "tool", "tool_call_id": output_item["tool_call_id"], "content": output_item["output"]})
 
 
 
 
113
 
114
- final_response = client.chat.completions.create(model="unsloth/Qwen3-30B-A3B-GGUF", temperature=0.6, messages=messages)
 
 
 
 
 
 
115
  final_chatbot_response = final_response.choices[0].message.content
116
  else:
117
  final_chatbot_response = llm_response.choices[0].message.content
118
 
119
- # Return the structured response
120
- return {"response": final_chatbot_response, "sources": source_links}
 
 
 
 
 
 
 
 
121
 
 
 
 
 
 
122
  except Exception as e:
123
- print(f"ERROR in /chat: {e}")
124
- return {"response": f"An internal error occurred: {str(e)}", "sources": []}
125
 
126
- # --- Health Check / Root Endpoint ---
127
  @app.get("/")
128
  async def root():
129
- return {"message": "Chatbot FastAPI is running."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import json
3
  import requests
4
+ from datetime import datetime
5
+ from typing import List, Dict, Optional
6
+ from fastapi import FastAPI, Request, HTTPException
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from openai import OpenAI
9
+ import logging
10
+
11
+ # --- Configure Logging ---
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
 
15
  # --- Load API Keys from Environment Variables ---
16
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 
18
  LLM_API_KEY = os.getenv("LLM_API_KEY")
19
  LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api-15i2e8ze256bvfn6.aistudio-app.com/v1")
20
 
21
+ # --- Enhanced System Prompt ---
22
+ SYSTEM_PROMPT = """You are an intelligent AI assistant with access to real-time web search capabilities. When answering questions:
23
+
24
+ 1. **Search Strategy**: Use web search when you need current information, recent events, or specific facts that may have changed.
25
+
26
+ 2. **Source Integration**: When using search results, synthesize information from multiple sources and clearly indicate when information comes from your search results.
27
+
28
+ 3. **Quality Response Guidelines**:
29
+ - Provide comprehensive, well-structured answers
30
+ - Cite sources when using search results
31
+ - If search results are contradictory, mention the discrepancy
32
+ - Prioritize recent and authoritative sources
33
+ - If search results are insufficient, acknowledge limitations
34
+
35
+ 4. **Response Format**:
36
+ - Start with a direct answer to the user's question
37
+ - Provide supporting details and context
38
+ - Include relevant examples when helpful
39
+ - End with additional insights or related information if relevant
40
+
41
+ 5. **Current Context**: Today's date is {current_date}. Use this for time-sensitive queries.
42
+
43
+ Remember to be helpful, accurate, and transparent about your information sources."""
44
+
45
+ # --- Enhanced Web Search Tool Implementation ---
46
+ def google_search_tool(queries: List[str], num_results: int = 5) -> List[Dict]:
47
+ """
48
+ Enhanced Google Custom Search with better error handling and result formatting
49
+ """
50
  if not GOOGLE_API_KEY or not GOOGLE_CX:
51
+ logger.error("GOOGLE_API_KEY or GOOGLE_CX environment variables not set.")
52
  return []
53
 
54
+ if not queries or not queries[0].strip():
55
+ logger.warning("Empty search query provided")
56
+ return []
57
+
58
+ query = queries[0].strip()
59
+ logger.info(f"Executing Google Custom Search for: '{query}'")
60
 
61
  search_url = "https://www.googleapis.com/customsearch/v1"
62
+ params = {
63
+ "key": GOOGLE_API_KEY,
64
+ "cx": GOOGLE_CX,
65
+ "q": query,
66
+ "num": min(num_results, 10), # Google API max is 10
67
+ "dateRestrict": "m6" # Prioritize results from last 6 months for freshness
68
+ }
69
 
70
  try:
71
+ response = requests.get(search_url, params=params, timeout=15)
72
  response.raise_for_status()
73
  search_results = response.json()
74
 
75
+ if "items" not in search_results:
76
+ logger.warning(f"No search results found for query: '{query}'")
77
+ return []
78
+
79
+ # Enhanced result parsing with better data validation
80
  parsed_results = []
81
  for item in search_results.get("items", []):
82
+ title = item.get("title", "").strip()
83
+ url = item.get("link", "").strip()
84
+ snippet = item.get("snippet", "").strip()
85
+
86
+ # Skip results with missing essential information
87
+ if not title or not url or not snippet:
88
+ continue
89
+
90
+ # Extract publication date if available
91
+ pub_date = None
92
+ if "pagemap" in item and "metatags" in item["pagemap"]:
93
+ for meta in item["pagemap"]["metatags"]:
94
+ if "article:published_time" in meta:
95
+ pub_date = meta["article:published_time"]
96
+ break
97
+
98
  parsed_results.append({
99
+ "source_title": title,
100
+ "url": url,
101
+ "snippet": snippet,
102
+ "published_date": pub_date,
103
+ "domain": url.split('/')[2] if '/' in url else url
104
  })
105
+
106
+ logger.info(f"Successfully parsed {len(parsed_results)} search results")
107
  return parsed_results
108
 
109
+ except requests.exceptions.Timeout:
110
+ logger.error("Google search request timed out")
111
+ return []
112
  except requests.exceptions.RequestException as e:
113
+ logger.error(f"Error during Google search request: {e}")
114
+ return []
115
+ except Exception as e:
116
+ logger.error(f"Unexpected error in google_search_tool: {e}")
117
  return []
118
 
119
+ def format_search_results_for_llm(search_results: List[Dict]) -> str:
120
+ """
121
+ Format search results with enhanced context for better LLM understanding
122
+ """
123
+ if not search_results:
124
+ return "No relevant search results were found for this query."
125
+
126
+ current_date = datetime.now().strftime("%Y-%m-%d")
127
+ formatted_results = [f"Search Results (Retrieved on {current_date}):\n"]
128
+
129
+ for i, result in enumerate(search_results, 1):
130
+ formatted_result = f"\n--- Result {i} ---"
131
+ formatted_result += f"\nTitle: {result['source_title']}"
132
+ formatted_result += f"\nSource: {result['domain']}"
133
+ formatted_result += f"\nURL: {result['url']}"
134
+
135
+ if result.get('published_date'):
136
+ formatted_result += f"\nPublished: {result['published_date']}"
137
+
138
+ formatted_result += f"\nContent: {result['snippet']}"
139
+ formatted_results.append(formatted_result)
140
+
141
+ formatted_results.append(f"\n--- End of Search Results ---\n")
142
+ formatted_results.append("Please synthesize this information to provide a comprehensive answer to the user's question. If the search results contain conflicting information, please note the discrepancy. Always cite your sources when using information from the search results.")
143
+
144
+ return "\n".join(formatted_results)
145
+
146
  # --- FastAPI Application Setup ---
147
+ app = FastAPI(title="AI Chatbot with Enhanced Search", version="2.0.0")
148
+
149
  app.add_middleware(
150
  CORSMiddleware,
151
+ allow_origins=["*"], # In production, specify actual origins
152
  allow_credentials=True,
153
  allow_methods=["*"],
154
  allow_headers=["*"],
155
  )
156
 
157
  # --- OpenAI Client Initialization ---
158
+ if not LLM_API_KEY or not LLM_BASE_URL:
159
+ logger.error("LLM_API_KEY or LLM_BASE_URL not configured")
160
+ client = None
161
+ else:
162
+ client = OpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL)
163
+ logger.info("OpenAI client initialized successfully")
164
 
165
+ # --- Enhanced Tool Definition ---
166
+ available_tools = [
167
+ {
168
+ "type": "function",
169
+ "function": {
170
+ "name": "google_search",
171
+ "description": "Performs a Google search for current information, recent events, specific facts, or when you need to verify or update your knowledge. Use this when the user asks about recent events, current statistics, latest news, or specific factual information that may have changed recently.",
172
+ "parameters": {
173
+ "type": "object",
174
+ "properties": {
175
+ "query": {
176
+ "type": "string",
177
+ "description": "The search query. Be specific and include relevant keywords. For recent events, include time-related terms like 'latest', '2024', 'recent', etc."
178
+ }
179
+ },
180
+ "required": ["query"]
181
+ }
182
+ }
183
+ }
184
+ ]
185
 
186
+ def should_use_search(message: str) -> bool:
187
+ """
188
+ Intelligent decision making for when to enable search based on message content
189
+ """
190
+ search_indicators = [
191
+ "latest", "recent", "current", "now", "today", "this year", "2024", "2025",
192
+ "news", "update", "what's happening", "status", "price", "stock",
193
+ "weather", "score", "results", "announcement", "release"
194
+ ]
195
+
196
+ factual_indicators = [
197
+ "who is", "what is", "where is", "when did", "how many", "statistics",
198
+ "data", "information about", "tell me about", "facts about"
199
+ ]
200
+
201
+ message_lower = message.lower()
202
+
203
+ # Strong indicators for search
204
+ if any(indicator in message_lower for indicator in search_indicators):
205
+ return True
206
+
207
+ # Moderate indicators for search (factual queries)
208
+ if any(indicator in message_lower for indicator in factual_indicators):
209
+ return True
210
+
211
+ return False
212
+
213
+ # --- Enhanced Chatbot Endpoint ---
214
  @app.post("/chat")
215
  async def chat_endpoint(request: Request):
216
  if not client:
217
+ raise HTTPException(status_code=500, detail="LLM client not configured")
218
 
219
  try:
220
  data = await request.json()
221
+ user_message = data.get("message", "").strip()
222
+ use_search = data.get("use_search") # None means auto-decide
223
+ conversation_history = data.get("history", [])
224
 
225
  if not user_message:
226
+ raise HTTPException(status_code=400, detail="No message provided")
227
 
228
+ # Auto-decide search usage if not specified
229
+ if use_search is None:
230
+ use_search = should_use_search(user_message)
231
+ logger.info(f"Auto-decided search usage: {use_search}")
232
+
233
+ # Prepare messages with enhanced system prompt
234
+ current_date = datetime.now().strftime("%Y-%m-%d")
235
+ system_message = {"role": "system", "content": SYSTEM_PROMPT.format(current_date=current_date)}
236
+ messages = [system_message] + conversation_history + [{"role": "user", "content": user_message}]
237
+
238
+ llm_kwargs = {
239
+ "model": "unsloth/Qwen3-30B-A3B-GGUF",
240
+ "temperature": 0.7, # Slightly higher for more creative responses
241
+ "messages": messages,
242
+ "max_tokens": 2000 # Ensure comprehensive responses
243
+ }
244
 
 
245
  if use_search:
246
+ logger.info("Search is ENABLED")
247
  llm_kwargs["tools"] = available_tools
248
  llm_kwargs["tool_choice"] = "auto"
249
  else:
250
+ logger.info("Search is DISABLED")
251
 
252
+ # First LLM call
253
  llm_response = client.chat.completions.create(**llm_kwargs)
 
254
  tool_calls = llm_response.choices[0].message.tool_calls
255
+ source_links = []
256
 
257
  if tool_calls:
258
+ logger.info(f"Processing {len(tool_calls)} tool calls")
259
  tool_outputs = []
260
+
261
  for tool_call in tool_calls:
262
+ if tool_call.function.name == "google_search":
263
+ try:
264
+ function_args = json.loads(tool_call.function.arguments)
265
+ search_query = function_args.get("query", "").strip()
 
 
 
 
 
 
 
 
 
266
 
267
+ if search_query:
268
+ logger.info(f"Executing search for: {search_query}")
269
+ search_results = google_search_tool([search_query], num_results=5)
270
+
271
+ # Collect source links for response
272
+ for result in search_results:
273
+ source_links.append({
274
+ "title": result["source_title"],
275
+ "url": result["url"],
276
+ "domain": result["domain"]
277
+ })
278
+
279
+ # Format results for LLM
280
+ formatted_results = format_search_results_for_llm(search_results)
281
+ tool_outputs.append({
282
+ "tool_call_id": tool_call.id,
283
+ "output": formatted_results
284
+ })
285
+ else:
286
+ logger.warning("Empty search query in tool call")
287
+ tool_outputs.append({
288
+ "tool_call_id": tool_call.id,
289
+ "output": "Error: Empty search query provided."
290
+ })
291
+
292
+ except json.JSONDecodeError as e:
293
+ logger.error(f"Failed to parse tool call arguments: {e}")
294
+ tool_outputs.append({
295
+ "tool_call_id": tool_call.id,
296
+ "output": "Error: Failed to parse search parameters."
297
+ })
298
 
299
+ # Continue conversation with search results
300
  messages.append(llm_response.choices[0].message)
301
  for output_item in tool_outputs:
302
+ messages.append({
303
+ "role": "tool",
304
+ "tool_call_id": output_item["tool_call_id"],
305
+ "content": output_item["output"]
306
+ })
307
 
308
+ # Final response generation with search context
309
+ final_response = client.chat.completions.create(
310
+ model="unsloth/Qwen3-30B-A3B-GGUF",
311
+ temperature=0.7,
312
+ messages=messages,
313
+ max_tokens=2000
314
+ )
315
  final_chatbot_response = final_response.choices[0].message.content
316
  else:
317
  final_chatbot_response = llm_response.choices[0].message.content
318
 
319
+ # Enhanced response structure
320
+ response_data = {
321
+ "response": final_chatbot_response,
322
+ "sources": source_links,
323
+ "search_used": bool(tool_calls),
324
+ "timestamp": datetime.now().isoformat()
325
+ }
326
+
327
+ logger.info(f"Chat response generated successfully. Search used: {bool(tool_calls)}")
328
+ return response_data
329
 
330
+ except HTTPException:
331
+ raise
332
+ except json.JSONDecodeError:
333
+ logger.error("Invalid JSON in request body")
334
+ raise HTTPException(status_code=400, detail="Invalid JSON in request body")
335
  except Exception as e:
336
+ logger.error(f"Unexpected error in /chat endpoint: {e}")
337
+ raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
338
 
339
+ # --- Health Check Endpoint ---
340
  @app.get("/")
341
  async def root():
342
+ return {
343
+ "message": "Enhanced AI Chatbot API is running",
344
+ "version": "2.0.0",
345
+ "features": ["Google Search Integration", "Intelligent Search Decision", "Enhanced Prompting"],
346
+ "timestamp": datetime.now().isoformat()
347
+ }
348
+
349
+ # --- Health Check Endpoint ---
350
+ @app.get("/health")
351
+ async def health_check():
352
+ health_status = {
353
+ "status": "healthy",
354
+ "timestamp": datetime.now().isoformat(),
355
+ "services": {
356
+ "llm_client": client is not None,
357
+ "google_search": bool(GOOGLE_API_KEY and GOOGLE_CX)
358
+ }
359
+ }
360
+ return health_status