rdune71 commited on
Commit
8795165
Β·
verified Β·
1 Parent(s): 84fd95c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -81
app.py CHANGED
@@ -74,7 +74,7 @@ class RateLimiter:
74
  self.max_calls = max_calls
75
  self.time_window = time_window
76
  self.calls = []
77
-
78
  def is_allowed(self):
79
  now = time.time()
80
  self.calls = [call for call in self.calls if now - call < self.time_window]
@@ -90,30 +90,7 @@ feedback_data = []
90
 
91
  def get_preloaded_context():
92
  """Get preloaded context information"""
93
- context = f"""{FORMATTED_DATE_TIME}
94
- System Information: You are an AI assistant with access to current information through web search and academic research tools.
95
- Always provide sources for factual information.
96
-
97
- Available APIs:
98
- - Web Search (Tavily)
99
- - Academic Research (arXiv, Semantic Scholar)
100
- - PDF Document Analysis
101
- - Web Page Content Extraction
102
-
103
- Specialized Features:
104
- - Research-focused queries automatically processed
105
- - Academic paper analysis and summarization
106
- - Literature review generation
107
- - Citation management and bibliography creation
108
-
109
- Response Guidelines:
110
- 1. After completing your analysis, ALWAYS end with either:
111
- '[ANALYSIS COMPLETE]' - when you've fully addressed the query
112
- '[FURTHER RESEARCH NEEDED]' - when additional investigation would be beneficial
113
-
114
- 2. For search results, provide clear synthesis rather than just listing findings
115
- 3. Include specific citations and sources where applicable
116
- 4. Structure complex answers with clear sections when appropriate"""
117
  return context
118
 
119
  def clean_query_for_current_info(query):
@@ -128,8 +105,8 @@ def clean_query_for_current_info(query):
128
  def determine_research_content_type(query):
129
  """Determine if query requires research-focused search"""
130
  research_keywords = [
131
- 'research', 'study', 'paper', 'academic', 'scientific',
132
- 'experiment', 'findings', 'discovery', 'theory',
133
  'hypothesis', 'methodology', 'conclusion', 'literature',
134
  'peer reviewed', 'scholarly', 'journal', 'publication',
135
  'analyze', 'investigate', 'examine', 'review'
@@ -226,7 +203,7 @@ def truncate_history(messages, max_tokens=4000):
226
 
227
  if current_tokens + message_tokens > max_tokens:
228
  break
229
-
230
  truncated.insert(0, message)
231
  current_tokens += message_tokens
232
 
@@ -241,10 +218,16 @@ def manage_conversation_memory(messages, max_turns=10):
241
  return system_msg + recent_messages if system_msg else recent_messages
242
  return messages
243
 
244
- # Clean query for current info
245
- clean_query = clean_query_for_current_info(query)
 
 
 
 
 
 
246
 
247
- if not clean_query:
248
  return "No valid search query provided."
249
 
250
  response = tavily_client.search(
@@ -295,7 +278,7 @@ def download_and_extract_pdf(url):
295
 
296
  full_text = "\n".join(text_content)
297
  return f"PDF CONTENT EXTRACTED FROM {url}:\n{full_text[:4000]}..." # Limit size
298
-
299
  except Exception as e:
300
  return f"PDF extraction error: {str(e)}"
301
 
@@ -329,7 +312,7 @@ def scrape_web_page(url):
329
  content = ' '.join([p.get_text().strip() for p in paragraphs[:30] if p.get_text().strip()])
330
 
331
  return f"WEB PAGE CONTENT FROM {url}:\nTitle: {title}\nContent: {content[:3000]}..." # Limit content size
332
-
333
  except Exception as e:
334
  return f"Error scraping page: {str(e)}"
335
 
@@ -360,7 +343,7 @@ def arxiv_search(query):
360
  return "\n\n---\n\n".join(results)
361
  else:
362
  return "No arXiv papers found for this query."
363
-
364
  except Exception as e:
365
  return f"arXiv search error: {str(e)}"
366
 
@@ -400,7 +383,7 @@ def semantic_scholar_search(query):
400
  return "\n\n---\n\n".join(results)
401
  else:
402
  return "No Semantic Scholar papers found for this query."
403
-
404
  except Exception as e:
405
  return f"Semantic Scholar search error: {str(e)}"
406
 
@@ -412,7 +395,7 @@ def comprehensive_research(query):
412
  results.append(f"COMPREHENSIVE RESEARCH RESULTS FOR: '{query}'\n" + "="*50)
413
 
414
  # Academic databases
415
- if TAVILY_AVAILABLE:
416
  tavily_result = tavily_search(query)
417
  results.append(f"TAVILY ACADEMIC SEARCH RESULTS:\n{tavily_result}")
418
 
@@ -433,25 +416,10 @@ def comprehensive_research(query):
433
 
434
  return "\n\n---\n\n".join(results)
435
 
436
- def perform_search(query):
437
- """Perform search using Tavily"""
438
- if TAVILY_AVAILABLE and tavily_client:
439
- web_result = tavily_search(query)
440
- return f"[SEARCH RESULTS FOR '{query}']:\nSource: Web Search\n{web_result}"
441
- else:
442
- return "Web search not available."
443
  def analyze_search_results(query, search_results):
444
  """Create a prompt for the model to analyze search results"""
445
- analysis_prompt = f"""Based on the search results below, please answer the original question: "{query}"
446
-
447
- Search Results: {search_results}
448
-
449
- Please provide a clear, concise answer based on these sources. Include specific names, facts, and cite the sources where possible. Do not mention that you are analyzing search results - just provide the answer directly.
450
-
451
- Structure your response thoughtfully and when you complete your analysis, please explicitly state '[ANALYSIS COMPLETE]' at the end if you have fully addressed the query and have no further input.
452
-
453
- If additional research or clarification would be beneficial, please state '[FURTHER RESEARCH NEEDED]'."""
454
-
455
  return analysis_prompt
456
 
457
  def generate_bibliography(search_results):
@@ -491,21 +459,7 @@ def generate_bibliography(search_results):
491
 
492
  def generate_literature_review(topic, search_results):
493
  """Generate structured literature review from search results"""
494
- prompt = f"""Based on the following research on '{topic}', create a structured literature review:
495
-
496
- {search_results}
497
-
498
- Please organize your response as follows:
499
- 1. INTRODUCTION: Brief overview of the topic
500
- 2. KEY FINDINGS: Major discoveries and insights from the research
501
- 3. METHODOLOGIES: Common research approaches used
502
- 4. LIMITATIONS: Identified gaps or limitations in current research
503
- 5. FUTURE DIRECTIONS: Suggested areas for future investigation
504
- 6. CONCLUSION: Summary of the current state of research
505
-
506
- Format your response clearly with these section headings.
507
-
508
- When you complete your analysis, please explicitly state '[ANALYSIS COMPLETE]' at the end."""
509
 
510
  return prompt
511
 
@@ -548,7 +502,7 @@ def check_analysis_status(content):
548
 
549
  # Check if this appears to be a final answer/response
550
  elif any(phrase in content.lower() for phrase in [
551
- "in conclusion", "to summarize", "in summary",
552
  "overall", "therefore", "thus", "in closing"
553
  ]):
554
  return "βœ… AI appears to be concluding its response."
@@ -563,7 +517,7 @@ def check_analysis_status(content):
563
 
564
  # Check for literature review generation
565
  elif "LITERATURE REVIEW" in content and any(header in content for header in [
566
- "INTRODUCTION", "KEY FINDINGS", "METHODOLOGIES",
567
  "LIMITATIONS", "FUTURE DIRECTIONS", "CONCLUSION"
568
  ]):
569
  return "πŸ“‘ Literature review structured - comprehensive analysis provided."
@@ -627,13 +581,13 @@ def generate_with_streaming(messages, model, max_tokens=8192, temperature=0.7, t
627
  continue
628
  else:
629
  yield f"Error: {response.status_code} - {response.text}"
630
-
631
  except Exception as e:
632
  yield f"Connection error: {str(e)}"
633
  finally:
634
  end_time = time.time()
635
  # Track usage (simplified)
636
- track_usage("user123", str(messages[-1]) if messages else "",
637
  end_time - start_time, len(str(messages)))
638
 
639
  def format_code_blocks(text):
@@ -684,7 +638,7 @@ def collect_feedback(feedback, query, response):
684
  @lru_cache(maxsize=100)
685
  def cached_search(query):
686
  """Cache frequent searches"""
687
- return perform_search(query)
688
 
689
  def handle_api_failure(error_type, fallback_strategy="retry"):
690
  """Handle different types of API failures gracefully"""
@@ -695,7 +649,7 @@ def export_conversation(chat_history, export_format):
695
  """Export conversation in various formats"""
696
  if not chat_history:
697
  return "No conversation to export"
698
-
699
  if export_format == "JSON":
700
  # Filter out system messages for export
701
  exportable_history = [msg for msg in chat_history if msg[0] != "system"]
@@ -731,8 +685,8 @@ def process_url_content(url):
731
  else:
732
  return scrape_web_page(url)
733
 
734
- def respond(message, chat_history, model_choice, max_tokens, temperature, top_p,
735
- creativity, precision, system_prompt, use_web_search, research_mode, theme):
736
  """Main response handler with conversation history"""
737
  if not message:
738
  yield "", chat_history, "", gr.update(choices=[], visible=False), "", "πŸ’¬ Ready for your query"
@@ -808,7 +762,7 @@ def respond(message, chat_history, model_choice, max_tokens, temperature, top_p,
808
  bibliography = generate_bibliography(search_result)
809
  analysis_status = "πŸ“š Comprehensive research completed"
810
  else:
811
- search_result = perform_search(message)
812
  bibliography = ""
813
  analysis_status = "πŸ“Š Search results retrieved"
814
 
@@ -859,7 +813,7 @@ def respond(message, chat_history, model_choice, max_tokens, temperature, top_p,
859
  # Break infinite loops
860
  if is_looping_content(full_response):
861
  # Force search instead of looping
862
- search_result = perform_search(message)
863
  follow_ups = generate_follow_up_questions(search_result)
864
  analysis_status = "⚠️ Loop detected - performing search instead"
865
  # Convert back to Gradio format
@@ -875,7 +829,7 @@ def respond(message, chat_history, model_choice, max_tokens, temperature, top_p,
875
  # Check for tool calls after completion or break loops
876
  if is_looping_content(full_response):
877
  # Force search for looping content
878
- search_result = perform_search(message)
879
  follow_ups = generate_follow_up_questions(search_result)
880
  analysis_status = "⚠️ Loop detected - performing search instead"
881
  # Convert back to Gradio format
@@ -949,14 +903,14 @@ with gr.Blocks(title="GPT-OSS Research Assistant") as demo:
949
  clear = gr.Button("Clear")
950
  theme_toggle = gr.Radio(choices=["Light", "Dark"], value="Light", label="Theme")
951
  feedback_radio = gr.Radio(
952
- choices=["πŸ‘ Helpful", "πŸ‘Ž Not Helpful", "πŸ”„ Needs Improvement"],
953
  label="Rate Last Response"
954
  )
955
 
956
  with gr.Row():
957
  with gr.Column():
958
  follow_up_questions = gr.Radio(
959
- choices=[],
960
  label="Suggested Follow-up Questions",
961
  visible=False
962
  )
 
74
  self.max_calls = max_calls
75
  self.time_window = time_window
76
  self.calls = []
77
+
78
  def is_allowed(self):
79
  now = time.time()
80
  self.calls = [call for call in self.calls if now - call < self.time_window]
 
90
 
91
  def get_preloaded_context():
92
  """Get preloaded context information"""
93
+ context = f"""{FORMATTED_DATE_TIME} System Information: You are an AI assistant with access to current information through web search and academic research tools. Always provide sources for factual information. Available APIs: - Web Search (Tavily) - Academic Research (arXiv, Semantic Scholar) - PDF Document Analysis - Web Page Content Extraction Specialized Features: - Research-focused queries automatically processed - Academic paper analysis and summarization - Literature review generation - Citation management and bibliography creation Response Guidelines: 1. After completing your analysis, ALWAYS end with either: '[ANALYSIS COMPLETE]' - when you've fully addressed the query '[FURTHER RESEARCH NEEDED]' - when additional investigation would be beneficial 2. For search results, provide clear synthesis rather than just listing findings 3. Include specific citations and sources where applicable 4. Structure complex answers with clear sections when appropriate"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  return context
95
 
96
  def clean_query_for_current_info(query):
 
105
  def determine_research_content_type(query):
106
  """Determine if query requires research-focused search"""
107
  research_keywords = [
108
+ 'research', 'study', 'paper', 'academic', 'scientific',
109
+ 'experiment', 'findings', 'discovery', 'theory',
110
  'hypothesis', 'methodology', 'conclusion', 'literature',
111
  'peer reviewed', 'scholarly', 'journal', 'publication',
112
  'analyze', 'investigate', 'examine', 'review'
 
203
 
204
  if current_tokens + message_tokens > max_tokens:
205
  break
206
+
207
  truncated.insert(0, message)
208
  current_tokens += message_tokens
209
 
 
218
  return system_msg + recent_messages if system_msg else recent_messages
219
  return messages
220
 
221
+ def tavily_search(query):
222
+ """Perform search using Tavily"""
223
+ if not TAVILY_AVAILABLE or not tavily_client:
224
+ return "Web search not available."
225
+
226
+ try:
227
+ # Clean query for current info
228
+ clean_query = clean_query_for_current_info(query)
229
 
230
+ if not clean_query:
231
  return "No valid search query provided."
232
 
233
  response = tavily_client.search(
 
278
 
279
  full_text = "\n".join(text_content)
280
  return f"PDF CONTENT EXTRACTED FROM {url}:\n{full_text[:4000]}..." # Limit size
281
+
282
  except Exception as e:
283
  return f"PDF extraction error: {str(e)}"
284
 
 
312
  content = ' '.join([p.get_text().strip() for p in paragraphs[:30] if p.get_text().strip()])
313
 
314
  return f"WEB PAGE CONTENT FROM {url}:\nTitle: {title}\nContent: {content[:3000]}..." # Limit content size
315
+
316
  except Exception as e:
317
  return f"Error scraping page: {str(e)}"
318
 
 
343
  return "\n\n---\n\n".join(results)
344
  else:
345
  return "No arXiv papers found for this query."
346
+
347
  except Exception as e:
348
  return f"arXiv search error: {str(e)}"
349
 
 
383
  return "\n\n---\n\n".join(results)
384
  else:
385
  return "No Semantic Scholar papers found for this query."
386
+
387
  except Exception as e:
388
  return f"Semantic Scholar search error: {str(e)}"
389
 
 
395
  results.append(f"COMPREHENSIVE RESEARCH RESULTS FOR: '{query}'\n" + "="*50)
396
 
397
  # Academic databases
398
+ if TAVILY_AVAILABLE and tavily_client:
399
  tavily_result = tavily_search(query)
400
  results.append(f"TAVILY ACADEMIC SEARCH RESULTS:\n{tavily_result}")
401
 
 
416
 
417
  return "\n\n---\n\n".join(results)
418
 
 
 
 
 
 
 
 
419
  def analyze_search_results(query, search_results):
420
  """Create a prompt for the model to analyze search results"""
421
+ analysis_prompt = f"""Based on the search results below, please answer the original question: "{query}" Search Results: {search_results} Please provide a clear, concise answer based on these sources. Include specific names, facts, and cite the sources where possible. Do not mention that you are analyzing search results - just provide the answer directly. Structure your response thoughtfully and when you complete your analysis, please explicitly state '[ANALYSIS COMPLETE]' at the end if you have fully addressed the query and have no further input. If additional research or clarification would be beneficial, please state '[FURTHER RESEARCH NEEDED]'."""
422
+
 
 
 
 
 
 
 
 
423
  return analysis_prompt
424
 
425
  def generate_bibliography(search_results):
 
459
 
460
  def generate_literature_review(topic, search_results):
461
  """Generate structured literature review from search results"""
462
+ prompt = f"""Based on the following research on '{topic}', create a structured literature review: {search_results} Please organize your response as follows: 1. INTRODUCTION: Brief overview of the topic 2. KEY FINDINGS: Major discoveries and insights from the research 3. METHODOLOGIES: Common research approaches used 4. LIMITATIONS: Identified gaps or limitations in current research 5. FUTURE DIRECTIONS: Suggested areas for future investigation 6. CONCLUSION: Summary of the current state of research Format your response clearly with these section headings. When you complete your analysis, please explicitly state '[ANALYSIS COMPLETE]' at the end."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
 
464
  return prompt
465
 
 
502
 
503
  # Check if this appears to be a final answer/response
504
  elif any(phrase in content.lower() for phrase in [
505
+ "in conclusion", "to summarize", "in summary",
506
  "overall", "therefore", "thus", "in closing"
507
  ]):
508
  return "βœ… AI appears to be concluding its response."
 
517
 
518
  # Check for literature review generation
519
  elif "LITERATURE REVIEW" in content and any(header in content for header in [
520
+ "INTRODUCTION", "KEY FINDINGS", "METHODOLOGIES",
521
  "LIMITATIONS", "FUTURE DIRECTIONS", "CONCLUSION"
522
  ]):
523
  return "πŸ“‘ Literature review structured - comprehensive analysis provided."
 
581
  continue
582
  else:
583
  yield f"Error: {response.status_code} - {response.text}"
584
+
585
  except Exception as e:
586
  yield f"Connection error: {str(e)}"
587
  finally:
588
  end_time = time.time()
589
  # Track usage (simplified)
590
+ track_usage("user123", str(messages[-1]) if messages else "",
591
  end_time - start_time, len(str(messages)))
592
 
593
  def format_code_blocks(text):
 
638
  @lru_cache(maxsize=100)
639
  def cached_search(query):
640
  """Cache frequent searches"""
641
+ return tavily_search(query)
642
 
643
  def handle_api_failure(error_type, fallback_strategy="retry"):
644
  """Handle different types of API failures gracefully"""
 
649
  """Export conversation in various formats"""
650
  if not chat_history:
651
  return "No conversation to export"
652
+
653
  if export_format == "JSON":
654
  # Filter out system messages for export
655
  exportable_history = [msg for msg in chat_history if msg[0] != "system"]
 
685
  else:
686
  return scrape_web_page(url)
687
 
688
+ def respond(message, chat_history, model_choice, max_tokens, temperature, top_p,
689
+ creativity, precision, system_prompt, use_web_search, research_mode, theme):
690
  """Main response handler with conversation history"""
691
  if not message:
692
  yield "", chat_history, "", gr.update(choices=[], visible=False), "", "πŸ’¬ Ready for your query"
 
762
  bibliography = generate_bibliography(search_result)
763
  analysis_status = "πŸ“š Comprehensive research completed"
764
  else:
765
+ search_result = tavily_search(message)
766
  bibliography = ""
767
  analysis_status = "πŸ“Š Search results retrieved"
768
 
 
813
  # Break infinite loops
814
  if is_looping_content(full_response):
815
  # Force search instead of looping
816
+ search_result = tavily_search(message)
817
  follow_ups = generate_follow_up_questions(search_result)
818
  analysis_status = "⚠️ Loop detected - performing search instead"
819
  # Convert back to Gradio format
 
829
  # Check for tool calls after completion or break loops
830
  if is_looping_content(full_response):
831
  # Force search for looping content
832
+ search_result = tavily_search(message)
833
  follow_ups = generate_follow_up_questions(search_result)
834
  analysis_status = "⚠️ Loop detected - performing search instead"
835
  # Convert back to Gradio format
 
903
  clear = gr.Button("Clear")
904
  theme_toggle = gr.Radio(choices=["Light", "Dark"], value="Light", label="Theme")
905
  feedback_radio = gr.Radio(
906
+ choices=["πŸ‘ Helpful", "πŸ‘Ž Not Helpful", "πŸ”„ Needs Improvement"],
907
  label="Rate Last Response"
908
  )
909
 
910
  with gr.Row():
911
  with gr.Column():
912
  follow_up_questions = gr.Radio(
913
+ choices=[],
914
  label="Suggested Follow-up Questions",
915
  visible=False
916
  )