bestroi commited on
Commit
a7dab22
·
verified ·
1 Parent(s): 14715bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +628 -135
app.py CHANGED
@@ -3,8 +3,15 @@ import pandas as pd
3
  import os
4
  import re
5
  import html
 
6
  from pathlib import Path
7
 
 
 
 
 
 
 
8
  # Function to load all CSV files from the current directory
9
  def load_csv_files():
10
  csv_files = {}
@@ -183,9 +190,266 @@ def search_data(city, search_type, search_query, case_sensitive=False, preserve_
183
  formatted_results += "</div><hr>"
184
 
185
  formatted_results += "</div>"
186
-
187
  return formatted_results
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  # Load all CSV files on startup
190
  all_data = load_csv_files()
191
  city_names = list(all_data.keys())
@@ -193,156 +457,385 @@ if not city_names:
193
  city_names = ["No data found"]
194
 
195
  # Create the Gradio interface
196
- with gr.Blocks(title="Query engine") as app:
197
- gr.Markdown("# Archaelogical Query Engine")
198
-
199
- with gr.Row():
200
- with gr.Column():
201
- city_dropdown = gr.Dropdown(
202
- choices=city_names,
203
- value=city_names[0] if city_names else None,
204
- label="Select City"
205
- )
206
 
207
- # Dropdown for queries based on the selected city
208
- query_dropdown = gr.Dropdown(
209
- choices=get_queries_for_city(city_names[0] if city_names else None),
210
- label="Select a Query",
211
- allow_custom_value=True
212
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
- search_type = gr.Radio(
215
- choices=["Simple Text Search", "Regular Expression Search"],
216
- value="Simple Text Search",
217
- label="Search Type"
218
- )
219
 
220
- # Keep a text box for custom queries
221
- search_query = gr.Textbox(
222
- label="Custom Search Query (optional)",
223
- placeholder="Enter custom text to search for..."
224
  )
225
 
226
- case_sensitive = gr.Checkbox(
227
- label="Case Sensitive",
228
- value=False
229
- )
 
 
 
 
 
230
 
231
- show_empty_queries = gr.Checkbox(
232
- label="Show Entries Without Queries",
233
- value=False,
234
- info="Check this to display entries that have empty or missing queries"
235
  )
 
 
 
 
236
 
237
- preserve_order = gr.Checkbox(
238
- label="Preserve Original Dataset Order",
239
- value=True,
240
- info="When checked, results will be displayed in their original order from the dataset. When unchecked, results will be displayed in the order they are found."
241
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
- search_button = gr.Button("Search")
244
-
245
- with gr.Column():
246
- results_text = gr.HTML(
247
- label="Search Results",
248
- value="",
249
- elem_classes=["results-output"]
250
- )
 
 
 
 
 
251
 
252
- stats_text = gr.Textbox(
253
- label="Dataset Statistics",
254
- value=f"Total cities loaded: {len(city_names)}\nCities: {', '.join(city_names)}"
 
255
  )
256
-
257
- # Update the query dropdown when the city changes
258
- def update_queries(city):
259
- return gr.Dropdown(choices=get_queries_for_city(city))
260
-
261
- city_dropdown.change(
262
- fn=update_queries,
263
- inputs=city_dropdown,
264
- outputs=query_dropdown
265
- )
266
-
267
- # Use either the dropdown query or the custom search query
268
- def search_with_queries(city, search_type, query_from_dropdown, custom_query, case_sensitive, show_empty_queries, preserve_order):
269
- if show_empty_queries:
270
- # If show_empty_queries is checked, we show entries without queries
271
- return find_empty_queries(city, preserve_order)
272
- else:
273
- # Otherwise, use the custom query if provided, otherwise use the dropdown selection
274
- final_query = custom_query if custom_query and custom_query.strip() else query_from_dropdown
275
- return search_data(city, search_type, final_query, case_sensitive, preserve_order)
276
-
277
- search_button.click(
278
- fn=search_with_queries,
279
- inputs=[city_dropdown, search_type, query_dropdown, search_query, case_sensitive, show_empty_queries, preserve_order],
280
- outputs=results_text
281
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
 
283
  # Launch the app
284
  if __name__ == "__main__":
285
  try:
286
- print("Starting Ancient Cities Query Interface...")
287
  print(f"Loaded {len(city_names)} cities: {', '.join(city_names)}")
288
-
289
- # Add CSS within the Blocks instead of in launch()
290
- with app:
291
- gr.HTML("""
292
- <style>
293
- .gradio-container {
294
- font-family: 'Arial', sans-serif;
295
- }
296
- .results-output {
297
- max-height: 600px;
298
- overflow-y: auto;
299
- padding: 10px;
300
- border: 1px solid #ddd;
301
- border-radius: 5px;
302
- }
303
- a {
304
- color: #007bff;
305
- text-decoration: none;
306
- }
307
- a:hover {
308
- text-decoration: underline;
309
- }
310
- b {
311
- color: #333;
312
- }
313
- .search-results {
314
- font-family: 'Arial', sans-serif;
315
- }
316
- .result-item {
317
- margin-bottom: 15px;
318
- padding: 10px;
319
- background-color: #f9f9f9;
320
- border-radius: 5px;
321
- }
322
- .result-item h3 {
323
- margin-top: 0;
324
- color: #333;
325
- }
326
- .original-index {
327
- font-size: 0.8em;
328
- color: #666;
329
- font-weight: normal;
330
- }
331
- .result-item:nth-child(odd) {
332
- background-color: #f5f5f5;
333
- }
334
- .result-item:nth-child(even) {
335
- background-color: #ffffff;
336
- }
337
- hr {
338
- border: 0;
339
- height: 1px;
340
- background-color: #ddd;
341
- margin: 15px 0;
342
- }
343
- </style>
344
- """)
345
-
346
  app.launch(show_error=True)
347
  except Exception as e:
348
  print(f"Error starting application: {e}")
 
3
  import os
4
  import re
5
  import html
6
+ import time
7
  from pathlib import Path
8
 
9
+ # Import Groq API client
10
+ try:
11
+ from groq import Groq
12
+ except ImportError:
13
+ print("Groq API not installed. Run: pip install groq")
14
+
15
  # Function to load all CSV files from the current directory
16
  def load_csv_files():
17
  csv_files = {}
 
190
  formatted_results += "</div><hr>"
191
 
192
  formatted_results += "</div>"
 
193
  return formatted_results
194
 
195
+ # Function to generate an answer using Groq API for a selected query
196
+ def generate_answer_with_groq(city, question, max_sources=3, api_key=None, temperature=0.3):
197
+ if not api_key or api_key.strip() == "":
198
+ return "Error: Groq API key not provided. Please enter your API key in the field above."
199
+
200
+ # Try to initialize the Groq client with the provided API key
201
+ try:
202
+ client = Groq(api_key=api_key)
203
+ except Exception as e:
204
+ return f"Error initializing Groq client: {str(e)}"
205
+
206
+ data = all_data.get(city)
207
+ if data is None:
208
+ return "City data not found"
209
+
210
+ # Find most relevant entries for the question
211
+ # This is a simple relevance sorting based on TF-IDF-like scoring
212
+ # For a production app, consider using proper embedding and semantic search
213
+ scores = []
214
+
215
+ # Keywords that indicate modern tourism/hotel content to deprioritize
216
+ tourism_keywords = ['hotel', 'vacation', 'booking', 'resort', 'accommodation', 'travel package',
217
+ 'tourism', 'tourist', 'reservation', 'stay', 'room', 'suite', 'spa', 'restaurant']
218
+
219
+ for i, row in data.iterrows():
220
+ context = str(row['context']) if not pd.isna(row['context']) else ""
221
+ url = str(row['url']) if not pd.isna(row['url']) else ""
222
+
223
+ # Check if this entry is primarily about modern tourism
224
+ context_lower = context.lower()
225
+ url_lower = url.lower()
226
+ tourism_score = sum(1 for keyword in tourism_keywords
227
+ if keyword in context_lower or keyword in url_lower)
228
+
229
+ # Simple scoring: count word overlap between question and context
230
+ question_words = set(question.lower().split())
231
+ context_words = set(context.lower().split())
232
+ overlap = len(question_words.intersection(context_words))
233
+
234
+ # Add a score if there are words in common, but penalize tourism content
235
+ if overlap > 0:
236
+ # Reduce score for entries with high tourism content
237
+ final_score = overlap - (tourism_score * 0.5) # Penalize tourism content
238
+ if final_score > 0: # Only include if still has positive relevance
239
+ scores.append({
240
+ 'index': i,
241
+ 'score': final_score,
242
+ 'url': url,
243
+ 'context': context,
244
+ 'tourism_score': tourism_score
245
+ })
246
+
247
+ # Sort by score (descending) and take the top entries
248
+ scores.sort(key=lambda x: x['score'], reverse=True)
249
+ top_entries = scores[:max_sources]
250
+
251
+ if not top_entries:
252
+ return f"No relevant information found in the {city} dataset for this question."
253
+
254
+ # Build a context from the most relevant entries
255
+ context_for_llm = f"Question about the ancient city of {city}: {question}\n\n"
256
+ context_for_llm += "Information from dataset:\n\n"
257
+
258
+ for i, entry in enumerate(top_entries, 1):
259
+ context_for_llm += f"Source {i}: {entry['url']}\n"
260
+ context_for_llm += f"Context: {entry['context'][:500]}...\n\n"
261
+
262
+ # Create a prompt for the LLM
263
+ prompt = f"""You are an expert historian specializing in ancient cities.
264
+ Use the following information to answer the question about the ancient city of {city}.
265
+ Base your answer ONLY on the provided information and cite the sources.
266
+ If you cannot find relevant information to answer the question, say so honestly.
267
+
268
+ IMPORTANT: Ignore any information about modern hotels, vacation packages, tourism accommodations, travel bookings, or contemporary tourism services. Focus only on historical, archaeological, and scholarly information about the ancient city.
269
+
270
+ {context_for_llm}
271
+
272
+ Answer the question in a comprehensive, detailed, and informative way. Provide as much relevant historical context as possible. Include proper citations to the sources using [Source X] notation.
273
+ Question: {question}
274
+
275
+ First, conduct a thorough analysis of each source - evaluate the information quality, relevance, and historical significance. Skip any sources that only contain information about hotels, vacations, or modern tourism.
276
+ Then provide a detailed, well-structured answer with comprehensive explanations and proper citations focused on historical and archaeological content. Include relevant background information, context, and connections to broader historical themes when supported by the sources.
277
+
278
+ Answer with this structure:
279
+ [THINKING]
280
+ (Show your detailed analysis of the sources here, noting if any sources are skipped due to being about hotels/tourism. Explain how you're weighing the information and what historical connections you're making.)
281
+ [/THINKING]
282
+
283
+ [ANSWER]
284
+ (Your comprehensive, detailed answer with citations, focusing on historical content only. Provide thorough explanations, context, and analysis based on the available sources.)
285
+ [/ANSWER]"""
286
+
287
+ try:
288
+ # Make the API call to Groq
289
+ start_time = time.time()
290
+
291
+ # Call Groq API with the deepseek-r1-distill-llama-70b model
292
+ response = client.chat.completions.create(
293
+ model="deepseek-r1-distill-llama-70b",
294
+ messages=[
295
+ {"role": "system", "content": "You are an expert historian specializing in ancient cities."},
296
+ {"role": "user", "content": prompt}
297
+ ],
298
+ temperature=temperature,
299
+ max_tokens=4000, # Increased for longer, more comprehensive answers
300
+ top_p=0.9,
301
+ )
302
+
303
+ completion_time = time.time() - start_time
304
+
305
+ # Extract and format the response
306
+ full_response = response.choices[0].message.content
307
+
308
+ # Check for explicit markers first
309
+ thinking_match = re.search(r'\[THINKING\](.*?)\[/THINKING\]', full_response, re.DOTALL)
310
+ answer_match = re.search(r'\[ANSWER\](.*?)\[/ANSWER\]', full_response, re.DOTALL)
311
+
312
+ # Initialize variables
313
+ thinking = ""
314
+ answer = ""
315
+
316
+ # Try different strategies to extract thinking and answer sections
317
+ if thinking_match and answer_match:
318
+ # Case 1: Both markers exist
319
+ thinking = thinking_match.group(1).strip()
320
+ answer = answer_match.group(1).strip()
321
+ elif "Final Answer:" in full_response:
322
+ # Case 2: There's a "Final Answer:" heading
323
+ parts = full_response.split("Final Answer:", 1)
324
+ thinking = parts[0].strip()
325
+ answer = parts[1].strip()
326
+ elif "**Analysis of Sources:**" in full_response and "**Conclusion:**" in full_response:
327
+ # Case 3: Look for analysis section followed by conclusion
328
+ analysis_start = full_response.find("**Analysis of Sources:**")
329
+ conclusion_start = full_response.find("**Conclusion:**")
330
+ if analysis_start < conclusion_start:
331
+ thinking = full_response[:analysis_start].strip()
332
+ answer = full_response[analysis_start:].strip()
333
+ else:
334
+ thinking = full_response[:conclusion_start].strip()
335
+ answer = full_response[conclusion_start:].strip()
336
+ elif "Thus," in full_response and "Therefore," in full_response:
337
+ # Case 4: Look for natural language transitions
338
+ thinking_end = max(full_response.rfind("Thus,"), full_response.rfind("Therefore,"))
339
+ if thinking_end > 0:
340
+ thinking = full_response[:thinking_end].strip()
341
+ answer = full_response[thinking_end:].strip()
342
+ elif "Starting with Source" in full_response or "Source 1" in full_response:
343
+ # Case 5: Detect source analysis pattern
344
+ # Look for where detailed source analysis ends and final answer begins
345
+ patterns = [
346
+ r"\n\n(?:To address|Based on|In conclusion|The answer|Therefore,|Thus,)",
347
+ r"\n\n\*\*.*?\*\*", # Look for bold headings that might start the answer
348
+ r"\n\nGiven the",
349
+ r"\n\nFrom the"
350
+ ]
351
+
352
+ split_point = -1
353
+ for pattern in patterns:
354
+ matches = list(re.finditer(pattern, full_response, re.IGNORECASE))
355
+ if matches:
356
+ # Take the last match to ensure we're at the final answer section
357
+ split_point = matches[-1].start()
358
+ break
359
+
360
+ if split_point > 0:
361
+ thinking = full_response[:split_point].strip()
362
+ answer = full_response[split_point:].strip()
363
+ else:
364
+ # Fallback: try to split at paragraph that doesn't start with "Source"
365
+ parts = re.split(r'\n\n(?![Ss]ource)', full_response, 1)
366
+ if len(parts) > 1 and len(parts[1]) > 100: # Make sure second part is substantial
367
+ thinking = parts[0].strip()
368
+ answer = parts[1].strip()
369
+ else:
370
+ thinking = "Source analysis integrated with response."
371
+ answer = full_response
372
+ else:
373
+ # Case 6: Try to split at a double newline followed by a sentence
374
+ # that doesn't start with "Source" (which is likely part of analysis)
375
+ parts = re.split(r'\n\n(?![Ss]ource)', full_response, 1)
376
+ if len(parts) > 1 and len(parts[1]) > 50: # Make sure second part is substantial
377
+ thinking = parts[0].strip()
378
+ answer = parts[1].strip()
379
+ else:
380
+ # Case 7: Default - use the whole response as answer and note no clear division
381
+ thinking = "Analysis not clearly separated in the model's response."
382
+ answer = full_response
383
+
384
+ # Format the answer as HTML with collapsible thinking and prominent answer sections
385
+ html_answer = f"<div class='llm-answer'>"
386
+
387
+ # Add the main answer section first (most prominent)
388
+ html_answer += "<div class='answer-section'>"
389
+ html_answer += "<h3>Answer:</h3>"
390
+
391
+ # Format answer with proper paragraphs and citation highlighting
392
+ formatted_answer = answer
393
+
394
+ # Highlight source citations [Source X]
395
+ formatted_answer = re.sub(
396
+ r'\[Source (\d+)\]',
397
+ r'<span class="citation">[Source \1]</span>',
398
+ formatted_answer
399
+ )
400
+
401
+ # Add paragraph breaks
402
+ formatted_answer = formatted_answer.replace("\n\n", "</p><p>")
403
+ formatted_answer = f"<p>{formatted_answer}</p>"
404
+
405
+ html_answer += f"<div class='answer-content'>{formatted_answer}</div>"
406
+ html_answer += "</div>"
407
+
408
+ # Add the collapsible thinking section
409
+ html_answer += "<div class='thinking-section'>"
410
+ html_answer += """
411
+ <details class='thinking-details'>
412
+ <summary class='thinking-summary'>
413
+ <span class='thinking-icon'>🔍</span>
414
+ <span class='thinking-title'>Show Analysis Process</span>
415
+ <span class='thinking-chevron'>▼</span>
416
+ </summary>
417
+ <div class='thinking-content-wrapper'>
418
+ """
419
+
420
+ # Format thinking text with proper paragraphs and source highlighting
421
+ formatted_thinking = thinking
422
+
423
+ # Replace "Source X:" with bold, highlighted version
424
+ for i in range(1, 10): # Support up to 9 sources
425
+ formatted_thinking = re.sub(
426
+ rf"Source {i}:",
427
+ f"<span class='source-highlight'>Source {i}:</span>",
428
+ formatted_thinking
429
+ )
430
+
431
+ # Add paragraph breaks for readability
432
+ formatted_thinking = formatted_thinking.replace("\n\n", "</p><p>")
433
+ formatted_thinking = f"<p>{formatted_thinking}</p>"
434
+
435
+ html_answer += f"<div class='thinking-content'>{formatted_thinking}</div>"
436
+ html_answer += "</div></details></div>"
437
+
438
+ # Add source references at the bottom
439
+ html_answer += "<div class='sources'><h4>Sources:</h4><ul>"
440
+ for i, entry in enumerate(top_entries, 1):
441
+ url_safe = html.escape(entry['url'])
442
+ html_answer += f"<li>[Source {i}]: <a href='{url_safe}' target='_blank'>{url_safe}</a></li>"
443
+ html_answer += "</ul></div>"
444
+
445
+ # Add a small note at the bottom
446
+ html_answer += f"<p class='model-info'><small>Generated using deepseek-r1-distill-llama-70b in {completion_time:.2f} seconds</small></p></div>"
447
+
448
+ return html_answer
449
+
450
+ except Exception as e:
451
+ return f"Error generating answer: {str(e)}"
452
+
453
  # Load all CSV files on startup
454
  all_data = load_csv_files()
455
  city_names = list(all_data.keys())
 
457
  city_names = ["No data found"]
458
 
459
  # Create the Gradio interface
460
+ with gr.Blocks(title="Archaeological Query Engine") as app:
461
+
462
+ # Add tabs - make sure there's only one top-level Tabs component
463
+ with gr.Tabs() as tabs:
464
+ with gr.TabItem("Search Dataset"):
465
+ gr.Markdown("Search through information about ancient cities from CSV files.")
 
 
 
 
466
 
467
+ with gr.Row():
468
+ with gr.Column():
469
+ city_dropdown = gr.Dropdown(
470
+ choices=city_names,
471
+ value=city_names[0] if city_names else None,
472
+ label="Select City"
473
+ )
474
+
475
+ # Dropdown for queries based on the selected city
476
+ query_dropdown = gr.Dropdown(
477
+ choices=get_queries_for_city(city_names[0] if city_names else None),
478
+ label="Select a Query",
479
+ allow_custom_value=True
480
+ )
481
+
482
+ search_type = gr.Radio(
483
+ choices=["Simple Text Search", "Regular Expression Search"],
484
+ value="Simple Text Search",
485
+ label="Search Type"
486
+ )
487
+
488
+ # Keep a text box for custom queries
489
+ search_query = gr.Textbox(
490
+ label="Custom Search Query (optional)",
491
+ placeholder="Enter custom text to search for..."
492
+ )
493
+
494
+ case_sensitive = gr.Checkbox(
495
+ label="Case Sensitive",
496
+ value=False
497
+ )
498
+
499
+ show_empty_queries = gr.Checkbox(
500
+ label="Show Entries Without Queries",
501
+ value=False,
502
+ info="Check this to display entries that have empty or missing queries"
503
+ )
504
+
505
+ preserve_order = gr.Checkbox(
506
+ label="Preserve Original Dataset Order",
507
+ value=True,
508
+ info="When checked, results will be displayed in their original order from the dataset. When unchecked, results will be displayed in the order they are found."
509
+ )
510
+
511
+ search_button = gr.Button("Search")
512
+
513
+ with gr.Column():
514
+ results_text = gr.HTML(
515
+ label="Search Results",
516
+ value="",
517
+ elem_classes=["results-output"]
518
+ )
519
+
520
+ stats_text = gr.Textbox(
521
+ label="Dataset Statistics",
522
+ value=f"Total cities loaded: {len(city_names)}\nCities: {', '.join(city_names)}"
523
+ )
524
 
525
+ # Update the query dropdown when the city changes
526
+ def update_queries(city):
527
+ return gr.Dropdown(choices=get_queries_for_city(city))
 
 
528
 
529
+ city_dropdown.change(
530
+ fn=update_queries,
531
+ inputs=city_dropdown,
532
+ outputs=query_dropdown
533
  )
534
 
535
+ # Use either the dropdown query or the custom search query
536
+ def search_with_queries(city, search_type, query_from_dropdown, custom_query, case_sensitive, show_empty_queries, preserve_order):
537
+ if show_empty_queries:
538
+ # If show_empty_queries is checked, we show entries without queries
539
+ return find_empty_queries(city, preserve_order)
540
+ else:
541
+ # Otherwise, use the custom query if provided, otherwise use the dropdown selection
542
+ final_query = custom_query if custom_query and custom_query.strip() else query_from_dropdown
543
+ return search_data(city, search_type, final_query, case_sensitive, preserve_order)
544
 
545
+ search_button.click(
546
+ fn=search_with_queries,
547
+ inputs=[city_dropdown, search_type, query_dropdown, search_query, case_sensitive, show_empty_queries, preserve_order],
548
+ outputs=results_text
549
  )
550
+
551
+ # Add new tab for AI-generated answers using Groq API
552
+ with gr.TabItem("AI Answers (Groq API)"):
553
+ gr.Markdown("Ask questions about the dataset and get AI-generated answers using the Groq API with the deepseek-r1-distill-llama-70b model.")
554
 
555
+ with gr.Row():
556
+ with gr.Column():
557
+ # API key is now hardcoded in the code
558
+
559
+ ai_city_dropdown = gr.Dropdown(
560
+ choices=city_names,
561
+ value=city_names[0] if city_names else None,
562
+ label="Select City"
563
+ )
564
+
565
+ question_input = gr.Textbox(
566
+ label="Ask a Question",
567
+ placeholder="E.g., What was the historical significance of this ancient city?",
568
+ lines=3
569
+ )
570
+
571
+ max_sources_slider = gr.Slider(
572
+ minimum=1,
573
+ maximum=10,
574
+ value=3,
575
+ step=1,
576
+ label="Maximum Number of Sources to Consider",
577
+ info="Higher values may provide more comprehensive answers but will take longer"
578
+ )
579
+
580
+ temperature_slider = gr.Slider(
581
+ minimum=0.0,
582
+ maximum=1.0,
583
+ value=0.3,
584
+ step=0.1,
585
+ label="Temperature",
586
+ info="Lower values create more focused answers, higher values create more creative ones"
587
+ )
588
+
589
+ generate_button = gr.Button("Generate Answer")
590
+
591
+ with gr.Column():
592
+ answer_output = gr.HTML(
593
+ label="AI-Generated Answer",
594
+ value="",
595
+ elem_classes=["results-output"]
596
+ )
597
 
598
+ # Function to handle the Generate Answer button click
599
+ def on_generate_answer(city, question, max_sources, api_key, temperature):
600
+ if not question or question.strip() == "":
601
+ return "Please enter a question to generate an answer."
602
+
603
+ # Use the provided Groq API key directly
604
+ # Replace this with your actual Groq API key
605
+ groq_api_key = Groq(api_key=os.environ.get("GROQ_API"))
606
+
607
+ try:
608
+ return generate_answer_with_groq(city, question, max_sources, groq_api_key, temperature)
609
+ except Exception as e:
610
+ return f"Error: {str(e)}"
611
 
612
+ generate_button.click(
613
+ fn=on_generate_answer,
614
+ inputs=[ai_city_dropdown, question_input, max_sources_slider, gr.Textbox(visible=False), temperature_slider],
615
+ outputs=answer_output
616
  )
617
+
618
+ # Add CSS styling
619
+ gr.HTML("""
620
+ <style>
621
+ .gradio-container {
622
+ font-family: 'Segoe UI', 'Arial', sans-serif;
623
+ }
624
+ .results-output {
625
+ max-height: 600px;
626
+ overflow-y: auto;
627
+ padding: 15px;
628
+ border: 1px solid #e2e8f0;
629
+ border-radius: 8px;
630
+ box-shadow: 0 2px 5px rgba(0,0,0,0.05);
631
+ }
632
+ a {
633
+ color: #3182ce;
634
+ text-decoration: none;
635
+ transition: color 0.2s;
636
+ }
637
+ a:hover {
638
+ text-decoration: underline;
639
+ color: #2c5282;
640
+ }
641
+ b {
642
+ color: #2d3748;
643
+ }
644
+ .search-results {
645
+ font-family: 'Segoe UI', 'Arial', sans-serif;
646
+ }
647
+ .result-item {
648
+ margin-bottom: 18px;
649
+ padding: 15px;
650
+ background-color: #f9f9f9;
651
+ border-radius: 8px;
652
+ box-shadow: 0 1px 3px rgba(0,0,0,0.05);
653
+ transition: transform 0.2s, box-shadow 0.2s;
654
+ }
655
+ .result-item:hover {
656
+ transform: translateY(-2px);
657
+ box-shadow: 0 4px 6px rgba(0,0,0,0.1);
658
+ }
659
+ .original-index {
660
+ font-size: 0.8em;
661
+ color: #718096;
662
+ font-weight: normal;
663
+ }
664
+ .result-item h3 {
665
+ margin-top: 0;
666
+ color: #2d3748;
667
+ font-weight: 600;
668
+ }
669
+ .result-item:nth-child(odd) {
670
+ background-color: #f5f7fa;
671
+ }
672
+ .result-item:nth-child(even) {
673
+ background-color: #ffffff;
674
+ }
675
+ hr {
676
+ border: 0;
677
+ height: 1px;
678
+ background-color: #e2e8f0;
679
+ margin: 20px 0;
680
+ }
681
+ .llm-answer {
682
+ font-family: 'Segoe UI', 'Arial', sans-serif;
683
+ line-height: 1.7;
684
+ padding: 20px;
685
+ border-radius: 12px;
686
+ background-color: #f8fafc;
687
+ box-shadow: 0 4px 12px rgba(0,0,0,0.08);
688
+ border: 1px solid #cbd5e0;
689
+ margin: 10px 0;
690
+ }
691
+ .answer-section {
692
+ margin-bottom: 25px;
693
+ background-color: #ffffff;
694
+ padding: 25px;
695
+ border-radius: 10px;
696
+ border: 1px solid #e2e8f0;
697
+ box-shadow: 0 2px 8px rgba(0,0,0,0.04);
698
+ }
699
+ .answer-section h3 {
700
+ margin-top: 0;
701
+ color: #1a202c;
702
+ font-weight: 700;
703
+ font-size: 1.3em;
704
+ margin-bottom: 20px;
705
+ display: flex;
706
+ align-items: center;
707
+ }
708
+ .answer-section h3::before {
709
+ content: "💡";
710
+ margin-right: 10px;
711
+ font-size: 1.1em;
712
+ }
713
+ .answer-content {
714
+ font-size: 1.05em;
715
+ line-height: 1.8;
716
+ color: #2d3748;
717
+ }
718
+ .answer-content p {
719
+ margin-bottom: 16px;
720
+ }
721
+ .answer-content .citation {
722
+ font-weight: 600;
723
+ color: #3182ce;
724
+ background-color: #ebf8ff;
725
+ padding: 2px 8px;
726
+ border-radius: 6px;
727
+ font-size: 0.9em;
728
+ border: 1px solid #bee3f8;
729
+ }
730
+ .thinking-section {
731
+ margin-bottom: 20px;
732
+ }
733
+ .thinking-details {
734
+ background-color: #f7fafc;
735
+ border: 1px solid #e2e8f0;
736
+ border-radius: 8px;
737
+ overflow: hidden;
738
+ }
739
+ .thinking-summary {
740
+ cursor: pointer;
741
+ padding: 15px 20px;
742
+ background-color: #edf2f7;
743
+ border-bottom: 1px solid #e2e8f0;
744
+ display: flex;
745
+ align-items: center;
746
+ font-weight: 600;
747
+ color: #4a5568;
748
+ transition: background-color 0.2s ease;
749
+ user-select: none;
750
+ }
751
+ .thinking-summary:hover {
752
+ background-color: #e2e8f0;
753
+ }
754
+ .thinking-icon {
755
+ margin-right: 10px;
756
+ font-size: 1.1em;
757
+ }
758
+ .thinking-title {
759
+ flex-grow: 1;
760
+ font-size: 0.95em;
761
+ }
762
+ .thinking-chevron {
763
+ font-size: 0.8em;
764
+ transition: transform 0.3s ease;
765
+ margin-left: 10px;
766
+ }
767
+ .thinking-details[open] .thinking-chevron {
768
+ transform: rotate(180deg);
769
+ }
770
+ .thinking-content-wrapper {
771
+ padding: 0;
772
+ }
773
+ .thinking-content {
774
+ background-color: #f0f4f8;
775
+ padding: 20px;
776
+ margin: 0;
777
+ font-size: 0.93em;
778
+ line-height: 1.6;
779
+ color: #4a5568;
780
+ }
781
+ .thinking-content p {
782
+ margin-bottom: 12px;
783
+ }
784
+ .thinking-content .source-highlight {
785
+ font-weight: 600;
786
+ color: #2b6cb0;
787
+ background-color: #ebf4ff;
788
+ padding: 2px 6px;
789
+ border-radius: 4px;
790
+ border: 1px solid #bee3f8;
791
+ }
792
+ .sources {
793
+ font-size: 0.95em;
794
+ margin-top: 25px;
795
+ padding: 20px;
796
+ background-color: #ffffff;
797
+ border-radius: 8px;
798
+ border: 1px solid #e2e8f0;
799
+ color: #4a5568;
800
+ }
801
+ .sources h4 {
802
+ margin-top: 0;
803
+ color: #2d3748;
804
+ font-weight: 600;
805
+ font-size: 1.05em;
806
+ margin-bottom: 15px;
807
+ display: flex;
808
+ align-items: center;
809
+ }
810
+ .sources h4::before {
811
+ content: "📚";
812
+ margin-right: 8px;
813
+ font-size: 1em;
814
+ }
815
+ .sources ul {
816
+ padding-left: 20px;
817
+ margin: 0;
818
+ }
819
+ .sources li {
820
+ margin-bottom: 8px;
821
+ line-height: 1.5;
822
+ }
823
+ .model-info {
824
+ text-align: right;
825
+ color: #718096;
826
+ margin-top: 20px;
827
+ margin-bottom: 0;
828
+ font-size: 0.85em;
829
+ padding-top: 15px;
830
+ border-top: 1px solid #e2e8f0;
831
+ }
832
+ </style>
833
+ """)
834
 
835
  # Launch the app
836
  if __name__ == "__main__":
837
  try:
 
838
  print(f"Loaded {len(city_names)} cities: {', '.join(city_names)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
839
  app.launch(show_error=True)
840
  except Exception as e:
841
  print(f"Error starting application: {e}")