Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,125 +8,694 @@ from urllib.parse import urlparse
|
|
| 8 |
import re
|
| 9 |
import json
|
| 10 |
from typing import List, Dict, Any
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# Validate Gemini API key
|
| 13 |
def validate_api_key(api_key: str) -> tuple[bool, str]:
|
| 14 |
"""Validate if the Gemini API key is working"""
|
| 15 |
if not api_key or not api_key.strip():
|
| 16 |
-
return False, "API key is empty. Please enter a valid Gemini API key."
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
try:
|
| 22 |
# Test the API key with a simple request
|
| 23 |
-
genai.configure(api_key=api_key
|
| 24 |
model = genai.GenerativeModel('gemini-2.0-flash')
|
| 25 |
-
|
| 26 |
-
# Try a minimal test generation
|
| 27 |
-
response = model.generate_content("
|
| 28 |
-
return True, "API key is valid
|
| 29 |
-
|
| 30 |
except Exception as e:
|
| 31 |
error_msg = str(e).lower()
|
|
|
|
|
|
|
| 32 |
if "api key not valid" in error_msg or "api_key_invalid" in error_msg:
|
| 33 |
-
return False, "Invalid API key. Please check your Gemini API key and try again.
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
else:
|
| 39 |
-
return False, f"API key validation failed: {str(e)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
-
# Search the web for relevant information using DuckDuckGo
|
| 42 |
-
def web_search(query: str, max_results: int =
|
| 43 |
-
"""
|
| 44 |
try:
|
| 45 |
with DDGS() as ddgs:
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
except Exception as e:
|
| 54 |
print(f"Search error: {e}")
|
| 55 |
-
#
|
| 56 |
try:
|
| 57 |
with DDGS() as ddgs:
|
| 58 |
results = list(ddgs.text(query, max_results=min(max_results, 5)))
|
|
|
|
| 59 |
return results
|
| 60 |
except Exception as e2:
|
| 61 |
-
print(f"
|
| 62 |
return []
|
| 63 |
|
| 64 |
-
# Fetch and extract content from a URL
|
| 65 |
def fetch_url_content(url: str) -> str:
|
| 66 |
-
"""Fetch content from a URL and extract meaningful text"""
|
| 67 |
try:
|
| 68 |
headers = {
|
| 69 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
}
|
| 71 |
-
|
|
|
|
|
|
|
| 72 |
response.raise_for_status()
|
| 73 |
|
| 74 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 75 |
|
| 76 |
# Remove unwanted elements
|
| 77 |
-
for element in soup(['script', 'style', 'nav', 'footer', 'header', 'aside']):
|
| 78 |
element.decompose()
|
| 79 |
|
| 80 |
-
#
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
-
# Clean up text
|
| 84 |
lines = (line.strip() for line in text.splitlines())
|
| 85 |
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
| 86 |
-
text = ' '.join(chunk for chunk in chunks if chunk)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
except Exception as e:
|
| 90 |
-
print(f"
|
| 91 |
return ""
|
| 92 |
|
| 93 |
-
# Research function using web search and content extraction
|
| 94 |
-
def perform_research(query: str, max_sources: int =
|
| 95 |
-
"""Perform research by searching and extracting content from multiple sources"""
|
| 96 |
-
print(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
-
# Search for relevant sources
|
| 99 |
-
search_results = web_search(query, max_results=max_sources*
|
|
|
|
| 100 |
|
| 101 |
sources = []
|
| 102 |
content_chunks = []
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
for i, result in enumerate(search_results
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
-
#
|
| 117 |
-
time.sleep(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
research_context = "\n".join(content_chunks)
|
| 120 |
|
|
|
|
|
|
|
|
|
|
| 121 |
return {
|
| 122 |
'sources': sources,
|
| 123 |
'research_context': research_context,
|
| 124 |
-
'query': query
|
|
|
|
|
|
|
|
|
|
| 125 |
}
|
| 126 |
|
| 127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
def generate_research_report(research_data: Dict[str, Any], gemini_api_key: str) -> str:
|
| 129 |
-
"""Generate a comprehensive research report using Gemini"""
|
| 130 |
if not gemini_api_key:
|
| 131 |
return "❌ Gemini API key is required to generate the report."
|
| 132 |
|
|
@@ -139,157 +708,730 @@ def generate_research_report(research_data: Dict[str, Any], gemini_api_key: str)
|
|
| 139 |
# Initialize Gemini (already configured in validation)
|
| 140 |
model = genai.GenerativeModel('gemini-2.0-flash')
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
prompt = f"""
|
| 143 |
-
RESEARCH
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
-
RESEARCH
|
| 146 |
{research_data['research_context']}
|
| 147 |
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
"""
|
| 158 |
|
| 159 |
response = model.generate_content(prompt)
|
| 160 |
return response.text
|
| 161 |
except Exception as e:
|
| 162 |
error_msg = str(e).lower()
|
|
|
|
|
|
|
| 163 |
if "api key not valid" in error_msg or "api_key_invalid" in error_msg:
|
| 164 |
-
return "❌ Invalid API key
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
else:
|
| 170 |
-
return f"❌ Error generating report: {str(e)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
# Main research function
|
| 173 |
-
def run_research(topic: str, gemini_api_key: str):
|
| 174 |
"""Run the complete research process"""
|
| 175 |
if not gemini_api_key.strip():
|
| 176 |
-
return "❌ Please enter your Gemini API key.", None, gr.update(visible=False)
|
| 177 |
|
| 178 |
if not topic.strip():
|
| 179 |
-
return "❌ Please enter a research topic.", None, gr.update(visible=False)
|
| 180 |
|
| 181 |
# First validate the API key
|
| 182 |
is_valid, validation_message = validate_api_key(gemini_api_key)
|
| 183 |
if not is_valid:
|
| 184 |
-
return f"❌ {validation_message}", None, gr.update(visible=False)
|
| 185 |
|
| 186 |
try:
|
| 187 |
# Perform research
|
|
|
|
| 188 |
research_data = perform_research(topic)
|
| 189 |
|
| 190 |
if not research_data['sources']:
|
| 191 |
-
return "❌ No relevant sources found. Please try a different search term.", None, gr.update(visible=False)
|
|
|
|
|
|
|
| 192 |
|
| 193 |
# Generate report
|
| 194 |
report = generate_research_report(research_data, gemini_api_key)
|
| 195 |
|
| 196 |
-
#
|
| 197 |
-
|
|
|
|
| 198 |
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
except Exception as e:
|
| 202 |
-
|
| 203 |
-
|
|
|
|
| 204 |
|
| 205 |
-
# Gradio interface
|
| 206 |
def create_interface():
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
#
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
with gr.Row():
|
| 224 |
-
with gr.Column(
|
| 225 |
-
gr.
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
with gr.Column(scale=2):
|
| 242 |
research_topic = gr.Textbox(
|
| 243 |
-
label="Research Topic",
|
| 244 |
-
placeholder="e.g., Latest developments in
|
| 245 |
-
lines=
|
|
|
|
| 246 |
)
|
| 247 |
|
| 248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
output = gr.Markdown(
|
| 251 |
-
|
| 252 |
-
|
|
|
|
|
|
|
| 253 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
|
| 260 |
-
#
|
| 261 |
-
def
|
| 262 |
if not api_key:
|
| 263 |
-
return gr.update(
|
|
|
|
|
|
|
|
|
|
| 264 |
|
| 265 |
is_valid, message = validate_api_key(api_key)
|
| 266 |
if is_valid:
|
| 267 |
-
return gr.update(
|
|
|
|
|
|
|
|
|
|
| 268 |
else:
|
| 269 |
-
return gr.update(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
-
#
|
| 272 |
validate_btn.click(
|
| 273 |
-
fn=
|
| 274 |
inputs=[gemini_key],
|
| 275 |
-
outputs=[validation_output
|
| 276 |
)
|
| 277 |
|
| 278 |
-
# Set up the research button
|
| 279 |
research_btn.click(
|
| 280 |
fn=run_research,
|
| 281 |
inputs=[research_topic, gemini_key],
|
| 282 |
-
outputs=[output,
|
| 283 |
)
|
| 284 |
|
| 285 |
-
#
|
| 286 |
-
def
|
| 287 |
-
|
|
|
|
|
|
|
| 288 |
|
| 289 |
-
|
| 290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
inputs=[output],
|
| 292 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
)
|
| 294 |
|
| 295 |
return demo
|
|
|
|
| 8 |
import re
|
| 9 |
import json
|
| 10 |
from typing import List, Dict, Any
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
import os
|
| 13 |
+
import tempfile
|
| 14 |
+
from reportlab.lib.pagesizes import letter, A4
|
| 15 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
|
| 16 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 17 |
+
from reportlab.lib.units import inch
|
| 18 |
+
from reportlab.lib import colors
|
| 19 |
+
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
|
| 20 |
+
import markdown
|
| 21 |
+
|
| 22 |
+
# Application Constants
|
| 23 |
+
APP_NAME = "DeepResearchAgent-AI"
|
| 24 |
+
APP_VERSION = "v2.0"
|
| 25 |
+
APP_DESCRIPTION = "Advanced AI-Powered Research Assistant"
|
| 26 |
+
|
| 27 |
+
# Enhanced topic detection and search helper functions
|
| 28 |
+
def detect_topic_category(query: str) -> str:
|
| 29 |
+
"""Detect the category of research topic for specialized search strategies"""
|
| 30 |
+
politics_keywords = ['politics', 'political', 'government', 'policy', 'election', 'democracy', 'parliament', 'congress', 'senate', 'president', 'minister', 'geopolitics', 'diplomacy', 'foreign policy', 'international relations']
|
| 31 |
+
history_keywords = ['history', 'historical', 'ancient', 'medieval', 'world war', 'civilization', 'empire', 'dynasty', 'revolution', 'century', 'era', 'timeline', 'past', 'heritage']
|
| 32 |
+
geography_keywords = ['geography', 'geographical', 'country', 'continent', 'ocean', 'mountain', 'river', 'climate', 'population', 'capital', 'border', 'region', 'territory', 'map']
|
| 33 |
+
current_affairs_keywords = ['current', 'news', 'today', 'recent', 'latest', 'breaking', 'update', 'happening', '2024', '2025', 'this year', 'now']
|
| 34 |
+
technology_keywords = ['technology', 'tech', 'ai', 'artificial intelligence', 'machine learning', 'software', 'hardware', 'computer', 'digital', 'programming', 'coding', 'algorithm', 'data science', 'cybersecurity']
|
| 35 |
+
war_keywords = ['war', 'warfare', 'conflict', 'battle', 'military', 'army', 'defense', 'weapon', 'strategy', 'combat', 'invasion', 'occupation', 'siege']
|
| 36 |
+
economics_keywords = ['economy', 'economic', 'finance', 'financial', 'market', 'trade', 'business', 'industry', 'company', 'corporation', 'gdp', 'inflation', 'recession']
|
| 37 |
+
science_keywords = ['science', 'scientific', 'research', 'study', 'experiment', 'discovery', 'innovation', 'physics', 'chemistry', 'biology', 'medicine', 'health']
|
| 38 |
+
|
| 39 |
+
query_lower = query.lower()
|
| 40 |
+
|
| 41 |
+
if any(keyword in query_lower for keyword in politics_keywords):
|
| 42 |
+
return 'politics'
|
| 43 |
+
elif any(keyword in query_lower for keyword in history_keywords):
|
| 44 |
+
return 'history'
|
| 45 |
+
elif any(keyword in query_lower for keyword in geography_keywords):
|
| 46 |
+
return 'geography'
|
| 47 |
+
elif any(keyword in query_lower for keyword in current_affairs_keywords):
|
| 48 |
+
return 'current_affairs'
|
| 49 |
+
elif any(keyword in query_lower for keyword in technology_keywords):
|
| 50 |
+
return 'technology'
|
| 51 |
+
elif any(keyword in query_lower for keyword in war_keywords):
|
| 52 |
+
return 'war'
|
| 53 |
+
elif any(keyword in query_lower for keyword in economics_keywords):
|
| 54 |
+
return 'economics'
|
| 55 |
+
elif any(keyword in query_lower for keyword in science_keywords):
|
| 56 |
+
return 'science'
|
| 57 |
+
else:
|
| 58 |
+
return 'general'
|
| 59 |
+
|
| 60 |
+
def get_specialized_domains(topic_type: str) -> List[str]:
|
| 61 |
+
"""Get specialized domains based on topic category"""
|
| 62 |
+
domain_mapping = {
|
| 63 |
+
'politics': ['reuters.com', 'bbc.com', 'cnn.com', 'politico.com', 'foreignaffairs.com', 'cfr.org', 'brookings.edu', 'csis.org'],
|
| 64 |
+
'history': ['britannica.com', 'history.com', 'nationalgeographic.com', 'smithsonianmag.com', 'historynet.com', 'worldhistory.org'],
|
| 65 |
+
'geography': ['nationalgeographic.com', 'worldatlas.com', 'britannica.com', 'cia.gov', 'worldbank.org', 'un.org'],
|
| 66 |
+
'current_affairs': ['reuters.com', 'bbc.com', 'cnn.com', 'ap.org', 'npr.org', 'aljazeera.com', 'theguardian.com', 'nytimes.com'],
|
| 67 |
+
'technology': ['techcrunch.com', 'wired.com', 'ars-technica.com', 'ieee.org', 'nature.com', 'sciencemag.org', 'mit.edu', 'stanford.edu'],
|
| 68 |
+
'war': ['janes.com', 'defensenews.com', 'militarytimes.com', 'csis.org', 'rand.org', 'stratfor.com'],
|
| 69 |
+
'economics': ['reuters.com', 'bloomberg.com', 'economist.com', 'ft.com', 'worldbank.org', 'imf.org', 'federalreserve.gov'],
|
| 70 |
+
'science': ['nature.com', 'sciencemag.org', 'scientificamerican.com', 'newscientist.com', 'pnas.org', 'cell.com'],
|
| 71 |
+
'general': ['wikipedia.org', 'britannica.com', 'reuters.com', 'bbc.com', 'cnn.com']
|
| 72 |
+
}
|
| 73 |
+
return domain_mapping.get(topic_type, domain_mapping['general'])
|
| 74 |
+
|
| 75 |
+
def get_topic_keywords(query: str, topic_type: str) -> List[str]:
|
| 76 |
+
"""Get enhanced keywords based on topic category"""
|
| 77 |
+
keyword_mapping = {
|
| 78 |
+
'politics': ['analysis', 'policy', 'government', 'official', 'statement', 'report', 'briefing', 'summit', 'debate', 'legislation'],
|
| 79 |
+
'history': ['timeline', 'chronology', 'facts', 'documented', 'archive', 'primary source', 'historian', 'evidence', 'analysis', 'context'],
|
| 80 |
+
'geography': ['facts', 'statistics', 'data', 'demographic', 'topography', 'atlas', 'survey', 'official', 'census', 'coordinates'],
|
| 81 |
+
'current_affairs': ['breaking', 'latest', 'update', 'developing', 'live', 'recent', 'today', 'headlines', 'news', 'report'],
|
| 82 |
+
'technology': ['innovation', 'breakthrough', 'development', 'advancement', 'research', 'cutting-edge', 'emerging', 'trend', 'future', 'application'],
|
| 83 |
+
'war': ['analysis', 'strategy', 'tactics', 'intelligence', 'assessment', 'report', 'conflict', 'situation', 'update', 'briefing'],
|
| 84 |
+
'economics': ['analysis', 'forecast', 'data', 'statistics', 'trend', 'market', 'report', 'outlook', 'indicator', 'growth'],
|
| 85 |
+
'science': ['research', 'study', 'discovery', 'breakthrough', 'publication', 'peer-reviewed', 'journal', 'findings', 'methodology', 'evidence'],
|
| 86 |
+
'general': ['information', 'facts', 'comprehensive', 'detailed', 'overview', 'guide', 'explanation', 'analysis', 'summary', 'background']
|
| 87 |
+
}
|
| 88 |
+
return keyword_mapping.get(topic_type, keyword_mapping['general'])
|
| 89 |
+
|
| 90 |
+
def get_priority_domains_for_topic(topic_type: str) -> List[str]:
|
| 91 |
+
"""Get priority domains for result ranking based on topic"""
|
| 92 |
+
priority_mapping = {
|
| 93 |
+
'politics': ['reuters.com', 'bbc.com', 'cnn.com', 'politico.com', 'foreignaffairs.com', 'cfr.org', 'brookings.edu', 'apnews.com'],
|
| 94 |
+
'history': ['britannica.com', 'history.com', 'nationalgeographic.com', 'smithsonianmag.com', 'worldhistory.org', 'historynet.com'],
|
| 95 |
+
'geography': ['nationalgeographic.com', 'worldatlas.com', 'britannica.com', 'cia.gov', 'worldbank.org', 'un.org'],
|
| 96 |
+
'current_affairs': ['reuters.com', 'bbc.com', 'cnn.com', 'ap.org', 'npr.org', 'aljazeera.com', 'theguardian.com', 'nytimes.com'],
|
| 97 |
+
'technology': ['techcrunch.com', 'wired.com', 'ars-technica.com', 'ieee.org', 'nature.com', 'mit.edu', 'stanford.edu', 'acm.org'],
|
| 98 |
+
'war': ['janes.com', 'defensenews.com', 'csis.org', 'rand.org', 'stratfor.com', 'cfr.org'],
|
| 99 |
+
'economics': ['reuters.com', 'bloomberg.com', 'economist.com', 'ft.com', 'worldbank.org', 'imf.org', 'federalreserve.gov'],
|
| 100 |
+
'science': ['nature.com', 'sciencemag.org', 'scientificamerican.com', 'newscientist.com', 'pnas.org', 'cell.com'],
|
| 101 |
+
'general': ['wikipedia.org', 'britannica.com', 'reuters.com', 'bbc.com', 'cnn.com', 'nationalgeographic.com']
|
| 102 |
+
}
|
| 103 |
+
return priority_mapping.get(topic_type, priority_mapping['general'])
|
| 104 |
+
|
| 105 |
+
# Sanitize filename for safe file creation
|
| 106 |
+
def sanitize_filename(filename: str) -> str:
|
| 107 |
+
"""Sanitize filename to remove invalid characters for Windows/Unix systems"""
|
| 108 |
+
# Remove or replace invalid characters
|
| 109 |
+
invalid_chars = '<>:"/\\|?*'
|
| 110 |
+
for char in invalid_chars:
|
| 111 |
+
filename = filename.replace(char, '_')
|
| 112 |
+
|
| 113 |
+
# Remove multiple consecutive underscores and trim
|
| 114 |
+
filename = re.sub(r'_+', '_', filename)
|
| 115 |
+
filename = filename.strip('_')
|
| 116 |
+
|
| 117 |
+
# Limit length to prevent issues
|
| 118 |
+
if len(filename) > 200:
|
| 119 |
+
filename = filename[:200]
|
| 120 |
+
|
| 121 |
+
# Ensure it's not empty and add extension if missing
|
| 122 |
+
if not filename:
|
| 123 |
+
filename = "research_report"
|
| 124 |
+
|
| 125 |
+
if not filename.endswith('.md'):
|
| 126 |
+
filename += '.md'
|
| 127 |
+
|
| 128 |
+
return filename
|
| 129 |
+
|
| 130 |
+
# PDF Generation Function
|
| 131 |
+
def create_pdf_report(content: str, topic: str, sources: List[Dict], filename: str) -> str:
|
| 132 |
+
"""Create a professional PDF report from markdown content"""
|
| 133 |
+
try:
|
| 134 |
+
# Create temporary PDF file
|
| 135 |
+
temp_dir = tempfile.gettempdir()
|
| 136 |
+
pdf_path = os.path.join(temp_dir, filename.replace('.md', '.pdf'))
|
| 137 |
+
|
| 138 |
+
# Create PDF document
|
| 139 |
+
doc = SimpleDocTemplate(pdf_path, pagesize=A4, topMargin=1*inch, bottomMargin=1*inch)
|
| 140 |
+
styles = getSampleStyleSheet()
|
| 141 |
+
story = []
|
| 142 |
+
|
| 143 |
+
# Custom styles
|
| 144 |
+
title_style = ParagraphStyle(
|
| 145 |
+
'CustomTitle',
|
| 146 |
+
parent=styles['Heading1'],
|
| 147 |
+
fontSize=24,
|
| 148 |
+
textColor=colors.HexColor('#2C3E50'),
|
| 149 |
+
spaceAfter=30,
|
| 150 |
+
alignment=TA_CENTER,
|
| 151 |
+
fontName='Helvetica-Bold'
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
subtitle_style = ParagraphStyle(
|
| 155 |
+
'CustomSubtitle',
|
| 156 |
+
parent=styles['Heading2'],
|
| 157 |
+
fontSize=14,
|
| 158 |
+
textColor=colors.HexColor('#34495E'),
|
| 159 |
+
spaceAfter=20,
|
| 160 |
+
alignment=TA_CENTER
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
header_style = ParagraphStyle(
|
| 164 |
+
'CustomHeader',
|
| 165 |
+
parent=styles['Heading2'],
|
| 166 |
+
fontSize=16,
|
| 167 |
+
textColor=colors.HexColor('#2980B9'),
|
| 168 |
+
spaceAfter=12,
|
| 169 |
+
spaceBefore=20,
|
| 170 |
+
fontName='Helvetica-Bold'
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
body_style = ParagraphStyle(
|
| 174 |
+
'CustomBody',
|
| 175 |
+
parent=styles['Normal'],
|
| 176 |
+
fontSize=11,
|
| 177 |
+
textColor=colors.HexColor('#2C3E50'),
|
| 178 |
+
spaceAfter=6,
|
| 179 |
+
alignment=TA_LEFT,
|
| 180 |
+
leading=14
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
# Header Section
|
| 184 |
+
story.append(Paragraph(APP_NAME, title_style))
|
| 185 |
+
story.append(Paragraph(APP_DESCRIPTION, subtitle_style))
|
| 186 |
+
story.append(Spacer(1, 0.2*inch))
|
| 187 |
+
|
| 188 |
+
# Add decorative line
|
| 189 |
+
line_data = [['', '']]
|
| 190 |
+
line_table = Table(line_data, colWidths=[5*inch])
|
| 191 |
+
line_table.setStyle(TableStyle([
|
| 192 |
+
('LINEBELOW', (0,0), (-1,-1), 2, colors.HexColor('#3498DB')),
|
| 193 |
+
]))
|
| 194 |
+
story.append(line_table)
|
| 195 |
+
story.append(Spacer(1, 0.3*inch))
|
| 196 |
+
|
| 197 |
+
# Research Topic
|
| 198 |
+
story.append(Paragraph("Research Topic", header_style))
|
| 199 |
+
story.append(Paragraph(topic, body_style))
|
| 200 |
+
story.append(Spacer(1, 0.2*inch))
|
| 201 |
+
|
| 202 |
+
# Generation Info
|
| 203 |
+
current_time = datetime.now().strftime("%B %d, %Y at %I:%M %p")
|
| 204 |
+
story.append(Paragraph("Generated", header_style))
|
| 205 |
+
story.append(Paragraph(f"{current_time}", body_style))
|
| 206 |
+
story.append(Spacer(1, 0.2*inch))
|
| 207 |
+
|
| 208 |
+
# Sources Summary
|
| 209 |
+
if sources:
|
| 210 |
+
story.append(Paragraph("Sources Analyzed", header_style))
|
| 211 |
+
story.append(Paragraph(f"{len(sources)} reliable sources processed", body_style))
|
| 212 |
+
story.append(Spacer(1, 0.3*inch))
|
| 213 |
+
|
| 214 |
+
story.append(PageBreak())
|
| 215 |
+
|
| 216 |
+
# Main Content
|
| 217 |
+
story.append(Paragraph("Research Report", header_style))
|
| 218 |
+
story.append(Spacer(1, 0.1*inch))
|
| 219 |
+
|
| 220 |
+
# Process markdown content
|
| 221 |
+
lines = content.split('\n')
|
| 222 |
+
for line in lines:
|
| 223 |
+
line = line.strip()
|
| 224 |
+
if not line:
|
| 225 |
+
story.append(Spacer(1, 6))
|
| 226 |
+
continue
|
| 227 |
+
|
| 228 |
+
if line.startswith('# '):
|
| 229 |
+
story.append(Paragraph(line[2:], header_style))
|
| 230 |
+
elif line.startswith('## '):
|
| 231 |
+
story.append(Paragraph(line[3:], header_style))
|
| 232 |
+
elif line.startswith('### '):
|
| 233 |
+
header_3_style = ParagraphStyle(
|
| 234 |
+
'Header3',
|
| 235 |
+
parent=header_style,
|
| 236 |
+
fontSize=14,
|
| 237 |
+
textColor=colors.HexColor('#7F8C8D')
|
| 238 |
+
)
|
| 239 |
+
story.append(Paragraph(line[4:], header_3_style))
|
| 240 |
+
elif line.startswith('**') and line.endswith('**'):
|
| 241 |
+
bold_style = ParagraphStyle(
|
| 242 |
+
'Bold',
|
| 243 |
+
parent=body_style,
|
| 244 |
+
fontName='Helvetica-Bold'
|
| 245 |
+
)
|
| 246 |
+
story.append(Paragraph(line[2:-2], bold_style))
|
| 247 |
+
elif line.startswith('- ') or line.startswith('* '):
|
| 248 |
+
bullet_style = ParagraphStyle(
|
| 249 |
+
'Bullet',
|
| 250 |
+
parent=body_style,
|
| 251 |
+
leftIndent=20,
|
| 252 |
+
bulletIndent=10,
|
| 253 |
+
bulletText='•',
|
| 254 |
+
bulletColor=colors.HexColor('#3498DB')
|
| 255 |
+
)
|
| 256 |
+
story.append(Paragraph(line[2:], bullet_style))
|
| 257 |
+
elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')):
|
| 258 |
+
story.append(Paragraph(line, body_style))
|
| 259 |
+
else:
|
| 260 |
+
# Clean basic markdown formatting
|
| 261 |
+
line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
|
| 262 |
+
line = re.sub(r'\*(.*?)\*', r'<i>\1</i>', line)
|
| 263 |
+
story.append(Paragraph(line, body_style))
|
| 264 |
+
|
| 265 |
+
# Footer section
|
| 266 |
+
story.append(PageBreak())
|
| 267 |
+
story.append(Paragraph("Sources", header_style))
|
| 268 |
+
|
| 269 |
+
if sources:
|
| 270 |
+
for i, source in enumerate(sources[:10], 1): # Limit to 10 sources
|
| 271 |
+
source_style = ParagraphStyle(
|
| 272 |
+
'Source',
|
| 273 |
+
parent=body_style,
|
| 274 |
+
fontSize=10,
|
| 275 |
+
leftIndent=10,
|
| 276 |
+
spaceAfter=8
|
| 277 |
+
)
|
| 278 |
+
title = source.get('title', 'No Title')[:100]
|
| 279 |
+
url = source.get('url', '')
|
| 280 |
+
story.append(Paragraph(f"{i}. {title}", source_style))
|
| 281 |
+
if url:
|
| 282 |
+
url_style = ParagraphStyle(
|
| 283 |
+
'URL',
|
| 284 |
+
parent=source_style,
|
| 285 |
+
fontSize=9,
|
| 286 |
+
textColor=colors.HexColor('#3498DB'),
|
| 287 |
+
leftIndent=20
|
| 288 |
+
)
|
| 289 |
+
story.append(Paragraph(url, url_style))
|
| 290 |
+
|
| 291 |
+
# Footer
|
| 292 |
+
story.append(Spacer(1, 0.5*inch))
|
| 293 |
+
footer_style = ParagraphStyle(
|
| 294 |
+
'Footer',
|
| 295 |
+
parent=styles['Normal'],
|
| 296 |
+
fontSize=10,
|
| 297 |
+
textColor=colors.HexColor('#7F8C8D'),
|
| 298 |
+
alignment=TA_CENTER
|
| 299 |
+
)
|
| 300 |
+
story.append(Paragraph(f"Generated by {APP_NAME} {APP_VERSION} | Advanced AI Research Assistant", footer_style))
|
| 301 |
+
|
| 302 |
+
# Build PDF
|
| 303 |
+
doc.build(story)
|
| 304 |
+
return pdf_path
|
| 305 |
+
|
| 306 |
+
except Exception as e:
|
| 307 |
+
print(f"PDF generation error: {e}")
|
| 308 |
+
return None
|
| 309 |
|
| 310 |
# Validate Gemini API key
|
| 311 |
def validate_api_key(api_key: str) -> tuple[bool, str]:
|
| 312 |
"""Validate if the Gemini API key is working"""
|
| 313 |
if not api_key or not api_key.strip():
|
| 314 |
+
return False, "❌ API key is empty. Please enter a valid Gemini API key."
|
| 315 |
+
|
| 316 |
+
api_key = api_key.strip()
|
| 317 |
+
|
| 318 |
+
# Basic format checks
|
| 319 |
+
if len(api_key) < 20:
|
| 320 |
+
return False, "❌ API key seems too short. Please check that you copied the complete key."
|
| 321 |
+
|
| 322 |
+
if not api_key.replace('-', '').replace('_', '').isalnum():
|
| 323 |
+
return False, "❌ API key contains invalid characters. Please check your key format."
|
| 324 |
+
|
| 325 |
try:
|
| 326 |
# Test the API key with a simple request
|
| 327 |
+
genai.configure(api_key=api_key)
|
| 328 |
model = genai.GenerativeModel('gemini-2.0-flash')
|
| 329 |
+
|
| 330 |
+
# Try a minimal test generation with timeout
|
| 331 |
+
response = model.generate_content("Test", generation_config={"max_output_tokens": 10})
|
| 332 |
+
return True, "✅ API key is valid and working!"
|
| 333 |
+
|
| 334 |
except Exception as e:
|
| 335 |
error_msg = str(e).lower()
|
| 336 |
+
print(f"API Key validation error: {e}") # Debug info
|
| 337 |
+
|
| 338 |
if "api key not valid" in error_msg or "api_key_invalid" in error_msg:
|
| 339 |
+
return False, """❌ Invalid API key. Please check your Gemini API key and try again.
|
| 340 |
+
|
| 341 |
+
**Common issues:**
|
| 342 |
+
• Make sure you copied the ENTIRE key from https://aistudio.google.com/
|
| 343 |
+
• Check for extra spaces at the beginning or end
|
| 344 |
+
• Try refreshing the page and copying the key again
|
| 345 |
+
• Make sure you're using the correct API key (not mixing up with other services)"""
|
| 346 |
+
|
| 347 |
+
elif "quota" in error_msg or "limit" in error_msg:
|
| 348 |
+
return False, """❌ API quota exceeded. Your Gemini API usage limit has been reached.
|
| 349 |
+
|
| 350 |
+
**Solutions:**
|
| 351 |
+
• Check your usage at https://aistudio.google.com/
|
| 352 |
+
• Wait for the quota to reset (usually monthly)
|
| 353 |
+
• Consider upgrading your plan if needed"""
|
| 354 |
+
|
| 355 |
+
elif "permission" in error_msg or "forbidden" in error_msg:
|
| 356 |
+
return False, """❌ API key doesn't have required permissions.
|
| 357 |
+
|
| 358 |
+
**Solutions:**
|
| 359 |
+
• Regenerate your API key at https://aistudio.google.com/
|
| 360 |
+
• Make sure the API key is enabled for Gemini API
|
| 361 |
+
• Check if your Google Cloud project has the necessary permissions"""
|
| 362 |
+
|
| 363 |
+
elif "network" in error_msg or "connection" in error_msg or "timeout" in error_msg:
|
| 364 |
+
return False, """❌ Network error. Please check your internet connection and try again.
|
| 365 |
+
|
| 366 |
+
**Troubleshooting:**
|
| 367 |
+
• Check your internet connection
|
| 368 |
+
• Try again in a few minutes
|
| 369 |
+
• Disable VPN if you're using one
|
| 370 |
+
• Check if Google services are accessible in your region"""
|
| 371 |
+
|
| 372 |
+
elif "model" in error_msg:
|
| 373 |
+
return False, """❌ Model not available. The specified Gemini model might not be available.
|
| 374 |
+
|
| 375 |
+
**Solutions:**
|
| 376 |
+
• Try using a different model (like 'gemini-pro')
|
| 377 |
+
• Check Gemini API availability at https://status.cloud.google.com/"""
|
| 378 |
+
|
| 379 |
else:
|
| 380 |
+
return False, f"""❌ API key validation failed: {str(e)}
|
| 381 |
+
|
| 382 |
+
**Debugging tips:**
|
| 383 |
+
• Make sure you're using a valid Gemini API key from https://aistudio.google.com/
|
| 384 |
+
• Try creating a new API key if the current one doesn't work
|
| 385 |
+
• Check the Google Cloud Console for any billing or permission issues"""
|
| 386 |
|
| 387 |
+
# Search the web for relevant information using DuckDuckGo with enhanced targeting for diverse topics
|
| 388 |
+
def web_search(query: str, max_results: int = 15) -> List[Dict[str, str]]:
|
| 389 |
+
"""Enhanced search for diverse topics: Politics, History, Technology, Current Affairs, etc."""
|
| 390 |
try:
|
| 391 |
with DDGS() as ddgs:
|
| 392 |
+
all_results = []
|
| 393 |
+
|
| 394 |
+
# Detect topic category for specialized search
|
| 395 |
+
topic_type = detect_topic_category(query.lower())
|
| 396 |
+
print(f"Detected topic category: {topic_type}")
|
| 397 |
+
|
| 398 |
+
# Strategy 1: Exact phrase search
|
| 399 |
+
try:
|
| 400 |
+
exact_results = list(ddgs.text(f'"{query}"', max_results=max_results//3))
|
| 401 |
+
all_results.extend(exact_results)
|
| 402 |
+
print(f"Found {len(exact_results)} results from exact search")
|
| 403 |
+
except Exception as e:
|
| 404 |
+
print(f"Exact search error: {e}")
|
| 405 |
+
|
| 406 |
+
# Strategy 2: Topic-specific domain searches
|
| 407 |
+
specialized_domains = get_specialized_domains(topic_type)
|
| 408 |
+
for domain in specialized_domains:
|
| 409 |
+
try:
|
| 410 |
+
domain_results = list(ddgs.text(f'{query} site:{domain}', max_results=2))
|
| 411 |
+
all_results.extend(domain_results)
|
| 412 |
+
if len(all_results) >= max_results:
|
| 413 |
+
break
|
| 414 |
+
except Exception as e:
|
| 415 |
+
print(f"Domain search error for {domain}: {e}")
|
| 416 |
+
continue
|
| 417 |
+
|
| 418 |
+
# Strategy 3: Enhanced keyword searches based on topic
|
| 419 |
+
enhanced_keywords = get_topic_keywords(query, topic_type)
|
| 420 |
+
for keyword in enhanced_keywords[:5]:
|
| 421 |
+
try:
|
| 422 |
+
keyword_results = list(ddgs.text(f'{query} {keyword}', max_results=2))
|
| 423 |
+
all_results.extend(keyword_results)
|
| 424 |
+
if len(all_results) >= max_results:
|
| 425 |
+
break
|
| 426 |
+
except Exception as e:
|
| 427 |
+
print(f"Keyword search error for {keyword}: {e}")
|
| 428 |
+
continue
|
| 429 |
+
|
| 430 |
+
# Strategy 4: Time-based searches for current affairs
|
| 431 |
+
if topic_type in ['current_affairs', 'politics', 'technology', 'news']:
|
| 432 |
+
time_modifiers = ['2024', '2025', 'latest', 'recent', 'current', 'today', 'this year']
|
| 433 |
+
for modifier in time_modifiers[:3]:
|
| 434 |
+
try:
|
| 435 |
+
time_results = list(ddgs.text(f'{query} {modifier}', max_results=2))
|
| 436 |
+
all_results.extend(time_results)
|
| 437 |
+
if len(all_results) >= max_results:
|
| 438 |
+
break
|
| 439 |
+
except Exception as e:
|
| 440 |
+
print(f"Time-based search error for {modifier}: {e}")
|
| 441 |
+
continue
|
| 442 |
+
|
| 443 |
+
# Strategy 5: Academic and authoritative sources
|
| 444 |
+
academic_modifiers = ['analysis', 'research', 'study', 'report', 'comprehensive', 'detailed']
|
| 445 |
+
for modifier in academic_modifiers[:3]:
|
| 446 |
+
try:
|
| 447 |
+
academic_results = list(ddgs.text(f'{query} {modifier}', max_results=2))
|
| 448 |
+
all_results.extend(academic_results)
|
| 449 |
+
if len(all_results) >= max_results:
|
| 450 |
+
break
|
| 451 |
+
except Exception as e:
|
| 452 |
+
print(f"Academic search error for {modifier}: {e}")
|
| 453 |
+
continue
|
| 454 |
+
|
| 455 |
+
# Strategy 6: Fallback comprehensive search
|
| 456 |
+
if len(all_results) < 8:
|
| 457 |
+
try:
|
| 458 |
+
general_results = list(ddgs.text(query, max_results=max_results//2))
|
| 459 |
+
all_results.extend(general_results)
|
| 460 |
+
except Exception as e:
|
| 461 |
+
print(f"General search error: {e}")
|
| 462 |
+
|
| 463 |
+
# Remove duplicates and prioritize authoritative domains
|
| 464 |
+
seen_urls = set()
|
| 465 |
+
unique_results = []
|
| 466 |
+
priority_domains = get_priority_domains_for_topic(topic_type)
|
| 467 |
+
|
| 468 |
+
# First, add results from priority domains
|
| 469 |
+
for result in all_results:
|
| 470 |
+
url = result.get('href', '')
|
| 471 |
+
if url not in seen_urls and any(domain in url for domain in priority_domains):
|
| 472 |
+
seen_urls.add(url)
|
| 473 |
+
unique_results.append(result)
|
| 474 |
+
if len(unique_results) >= max_results:
|
| 475 |
+
break
|
| 476 |
+
|
| 477 |
+
# Then add other unique results
|
| 478 |
+
for result in all_results:
|
| 479 |
+
url = result.get('href', '')
|
| 480 |
+
if url not in seen_urls:
|
| 481 |
+
seen_urls.add(url)
|
| 482 |
+
unique_results.append(result)
|
| 483 |
+
if len(unique_results) >= max_results:
|
| 484 |
+
break
|
| 485 |
+
|
| 486 |
+
print(f"Total unique results found: {len(unique_results)}")
|
| 487 |
+
return unique_results[:max_results]
|
| 488 |
+
|
| 489 |
except Exception as e:
|
| 490 |
print(f"Search error: {e}")
|
| 491 |
+
# Final fallback - simple search
|
| 492 |
try:
|
| 493 |
with DDGS() as ddgs:
|
| 494 |
results = list(ddgs.text(query, max_results=min(max_results, 5)))
|
| 495 |
+
print(f"Fallback search found: {len(results)} results")
|
| 496 |
return results
|
| 497 |
except Exception as e2:
|
| 498 |
+
print(f"Fallback search error: {e2}")
|
| 499 |
return []
|
| 500 |
|
| 501 |
+
# Fetch and extract content from a URL with better error handling
|
| 502 |
def fetch_url_content(url: str) -> str:
|
| 503 |
+
"""Fetch content from a URL and extract meaningful text with enhanced error handling"""
|
| 504 |
try:
|
| 505 |
headers = {
|
| 506 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
| 507 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
| 508 |
+
'Accept-Language': 'en-US,en;q=0.5',
|
| 509 |
+
'Accept-Encoding': 'gzip, deflate',
|
| 510 |
+
'Connection': 'keep-alive',
|
| 511 |
+
'Upgrade-Insecure-Requests': '1',
|
| 512 |
}
|
| 513 |
+
|
| 514 |
+
# Increase timeout and add retries
|
| 515 |
+
response = requests.get(url, headers=headers, timeout=15, allow_redirects=True)
|
| 516 |
response.raise_for_status()
|
| 517 |
|
| 518 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 519 |
|
| 520 |
# Remove unwanted elements
|
| 521 |
+
for element in soup(['script', 'style', 'nav', 'footer', 'header', 'aside', 'iframe', 'noscript']):
|
| 522 |
element.decompose()
|
| 523 |
|
| 524 |
+
# Try to get the main content area first
|
| 525 |
+
main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=['content', 'main', 'body'])
|
| 526 |
+
if main_content:
|
| 527 |
+
text = main_content.get_text()
|
| 528 |
+
else:
|
| 529 |
+
text = soup.get_text()
|
| 530 |
|
| 531 |
+
# Clean up text more thoroughly
|
| 532 |
lines = (line.strip() for line in text.splitlines())
|
| 533 |
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
| 534 |
+
text = ' '.join(chunk for chunk in chunks if chunk and len(chunk) > 2)
|
| 535 |
+
|
| 536 |
+
# Remove excessive whitespace and clean up
|
| 537 |
+
text = re.sub(r'\s+', ' ', text)
|
| 538 |
+
text = text.strip()
|
| 539 |
|
| 540 |
+
# Return more content for better analysis - increased from 5000 to 8000
|
| 541 |
+
return text[:8000] if text else ""
|
| 542 |
+
|
| 543 |
+
except requests.exceptions.Timeout:
|
| 544 |
+
print(f"Timeout error for {url} - trying with shorter timeout")
|
| 545 |
+
try:
|
| 546 |
+
# Retry with shorter timeout
|
| 547 |
+
response = requests.get(url, headers=headers, timeout=8, allow_redirects=True)
|
| 548 |
+
response.raise_for_status()
|
| 549 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
| 550 |
+
text = soup.get_text()
|
| 551 |
+
text = re.sub(r'\s+', ' ', text.strip())
|
| 552 |
+
return text[:5000] if text else ""
|
| 553 |
+
except Exception as retry_error:
|
| 554 |
+
print(f"Retry failed for {url}: {retry_error}")
|
| 555 |
+
return ""
|
| 556 |
+
|
| 557 |
+
except requests.exceptions.RequestException as e:
|
| 558 |
+
print(f"Request error fetching {url}: {e}")
|
| 559 |
+
return ""
|
| 560 |
except Exception as e:
|
| 561 |
+
print(f"Unexpected error fetching {url}: {e}")
|
| 562 |
return ""
|
| 563 |
|
| 564 |
+
# Research function using web search and content extraction with enhanced analysis for diverse topics
|
| 565 |
+
def perform_research(query: str, max_sources: int = 12) -> Dict[str, Any]:
|
| 566 |
+
"""Perform comprehensive research by searching and extracting content from multiple sources"""
|
| 567 |
+
print(f"🔍 Starting comprehensive research for: {query}")
|
| 568 |
+
|
| 569 |
+
# Detect topic category for better research strategy
|
| 570 |
+
topic_type = detect_topic_category(query.lower())
|
| 571 |
+
print(f"📊 Detected topic category: {topic_type}")
|
| 572 |
|
| 573 |
+
# Search for relevant sources with more results to ensure we get at least 10 quality sources
|
| 574 |
+
search_results = web_search(query, max_results=max_sources*4) # Get more results initially
|
| 575 |
+
print(f"📊 Found {len(search_results)} potential sources")
|
| 576 |
|
| 577 |
sources = []
|
| 578 |
content_chunks = []
|
| 579 |
+
successful_fetches = 0
|
| 580 |
+
failed_fetches = 0
|
| 581 |
|
| 582 |
+
for i, result in enumerate(search_results):
|
| 583 |
+
if successful_fetches >= max_sources:
|
| 584 |
+
break
|
| 585 |
+
|
| 586 |
+
url = result.get('href', '')
|
| 587 |
+
title = result.get('title', 'No title')
|
| 588 |
+
|
| 589 |
+
# Skip low-quality or duplicate sources
|
| 590 |
+
if should_skip_source(url, title, sources):
|
| 591 |
+
print(f"⏭️ Skipping {url} - low quality or duplicate")
|
| 592 |
+
continue
|
| 593 |
|
| 594 |
+
print(f"🌐 Fetching content from {url}")
|
| 595 |
+
content = fetch_url_content(url)
|
| 596 |
+
|
| 597 |
+
if content and len(content) > 150: # Minimum content threshold
|
| 598 |
+
# Validate content quality for the specific topic
|
| 599 |
+
if is_relevant_content(content, query, topic_type):
|
| 600 |
+
sources.append({
|
| 601 |
+
'title': title,
|
| 602 |
+
'url': url,
|
| 603 |
+
'content': content,
|
| 604 |
+
'topic_type': topic_type
|
| 605 |
+
})
|
| 606 |
+
content_chunks.append(f"SOURCE {successful_fetches + 1} [{topic_type.upper()}]:\nTITLE: {title}\nURL: {url}\nCONTENT:\n{content}\n{'='*100}\n")
|
| 607 |
+
successful_fetches += 1
|
| 608 |
+
print(f"✅ Successfully extracted {len(content)} characters from source {successful_fetches}")
|
| 609 |
+
else:
|
| 610 |
+
print(f"⚠️ Content not relevant for {query}")
|
| 611 |
+
failed_fetches += 1
|
| 612 |
+
else:
|
| 613 |
+
print(f"⚠️ Skipped {url} - insufficient content ({len(content) if content else 0} chars)")
|
| 614 |
+
failed_fetches += 1
|
| 615 |
|
| 616 |
+
# Add small delay to be respectful
|
| 617 |
+
time.sleep(0.3)
|
| 618 |
+
|
| 619 |
+
# If we don't have enough sources, try a broader search
|
| 620 |
+
if successful_fetches < 8:
|
| 621 |
+
print(f"🔄 Only found {successful_fetches} quality sources, trying broader search...")
|
| 622 |
+
broader_results = web_search(f"{query} comprehensive analysis", max_results=15)
|
| 623 |
+
|
| 624 |
+
for result in broader_results:
|
| 625 |
+
if successful_fetches >= max_sources:
|
| 626 |
+
break
|
| 627 |
+
|
| 628 |
+
url = result.get('href', '')
|
| 629 |
+
title = result.get('title', 'No title')
|
| 630 |
+
|
| 631 |
+
if should_skip_source(url, title, sources):
|
| 632 |
+
continue
|
| 633 |
+
|
| 634 |
+
content = fetch_url_content(url)
|
| 635 |
+
if content and len(content) > 100:
|
| 636 |
+
sources.append({
|
| 637 |
+
'title': title,
|
| 638 |
+
'url': url,
|
| 639 |
+
'content': content,
|
| 640 |
+
'topic_type': 'additional'
|
| 641 |
+
})
|
| 642 |
+
content_chunks.append(f"ADDITIONAL SOURCE {successful_fetches + 1}:\nTITLE: {title}\nURL: {url}\nCONTENT:\n{content}\n{'='*100}\n")
|
| 643 |
+
successful_fetches += 1
|
| 644 |
+
print(f"✅ Additional source {successful_fetches} added")
|
| 645 |
+
|
| 646 |
+
time.sleep(0.3)
|
| 647 |
|
| 648 |
research_context = "\n".join(content_chunks)
|
| 649 |
|
| 650 |
+
print(f"📝 Research completed: {successful_fetches} sources processed, {failed_fetches} failed")
|
| 651 |
+
print(f"📊 Total content length: {len(research_context)} characters")
|
| 652 |
+
|
| 653 |
return {
|
| 654 |
'sources': sources,
|
| 655 |
'research_context': research_context,
|
| 656 |
+
'query': query,
|
| 657 |
+
'total_sources': successful_fetches,
|
| 658 |
+
'topic_type': topic_type,
|
| 659 |
+
'failed_sources': failed_fetches
|
| 660 |
}
|
| 661 |
|
| 662 |
+
def should_skip_source(url: str, title: str, existing_sources: List[Dict]) -> bool:
|
| 663 |
+
"""Check if a source should be skipped based on quality and duplication"""
|
| 664 |
+
# Skip if URL already exists
|
| 665 |
+
existing_urls = [source['url'] for source in existing_sources]
|
| 666 |
+
if url in existing_urls:
|
| 667 |
+
return True
|
| 668 |
+
|
| 669 |
+
# Skip low-quality domains
|
| 670 |
+
low_quality_domains = ['pinterest.com', 'instagram.com', 'facebook.com', 'twitter.com', 'tiktok.com', 'reddit.com']
|
| 671 |
+
if any(domain in url for domain in low_quality_domains):
|
| 672 |
+
return True
|
| 673 |
+
|
| 674 |
+
# Skip if title is too short or generic
|
| 675 |
+
if len(title) < 10 or title.lower() in ['no title', 'untitled', 'page not found']:
|
| 676 |
+
return True
|
| 677 |
+
|
| 678 |
+
return False
|
| 679 |
+
|
| 680 |
+
def is_relevant_content(content: str, query: str, topic_type: str) -> bool:
|
| 681 |
+
"""Check if content is relevant to the query and topic type"""
|
| 682 |
+
content_lower = content.lower()
|
| 683 |
+
query_words = query.lower().split()
|
| 684 |
+
|
| 685 |
+
# Check if at least 30% of query words appear in content
|
| 686 |
+
matching_words = sum(1 for word in query_words if word in content_lower)
|
| 687 |
+
word_relevance = matching_words / len(query_words) if query_words else 0
|
| 688 |
+
|
| 689 |
+
# Topic-specific relevance keywords
|
| 690 |
+
topic_relevance_keywords = get_topic_keywords(query, topic_type)
|
| 691 |
+
topic_matches = sum(1 for keyword in topic_relevance_keywords if keyword.lower() in content_lower)
|
| 692 |
+
|
| 693 |
+
# Content should have reasonable length and relevance
|
| 694 |
+
return len(content) > 200 and (word_relevance >= 0.3 or topic_matches >= 2)
|
| 695 |
+
|
| 696 |
+
# Generate a research report using Gemini with enhanced topic handling
|
| 697 |
def generate_research_report(research_data: Dict[str, Any], gemini_api_key: str) -> str:
|
| 698 |
+
"""Generate a comprehensive research report using Gemini for diverse topics"""
|
| 699 |
if not gemini_api_key:
|
| 700 |
return "❌ Gemini API key is required to generate the report."
|
| 701 |
|
|
|
|
| 708 |
# Initialize Gemini (already configured in validation)
|
| 709 |
model = genai.GenerativeModel('gemini-2.0-flash')
|
| 710 |
|
| 711 |
+
topic_type = research_data.get('topic_type', 'general')
|
| 712 |
+
failed_sources = research_data.get('failed_sources', 0)
|
| 713 |
+
|
| 714 |
+
# Create topic-specific prompt
|
| 715 |
prompt = f"""
|
| 716 |
+
RESEARCH QUERY: {research_data['query']}
|
| 717 |
+
TOPIC CATEGORY: {topic_type.upper()}
|
| 718 |
+
TOTAL SOURCES ANALYZED: {research_data.get('total_sources', len(research_data['sources']))}
|
| 719 |
+
FAILED SOURCES: {failed_sources}
|
| 720 |
|
| 721 |
+
COMPREHENSIVE RESEARCH DATA FROM MULTIPLE AUTHORITATIVE SOURCES:
|
| 722 |
{research_data['research_context']}
|
| 723 |
|
| 724 |
+
INSTRUCTIONS FOR {topic_type.upper()} RESEARCH REPORT:
|
| 725 |
+
Based on the above research data, create a comprehensive, well-structured report analyzing ALL the information provided. This is a {topic_type} research topic, so focus on relevant aspects for this domain.
|
| 726 |
+
|
| 727 |
+
Your report structure should include:
|
| 728 |
+
|
| 729 |
+
1. **EXECUTIVE SUMMARY**
|
| 730 |
+
- Key findings and main points about {research_data['query']}
|
| 731 |
+
- Critical insights and takeaways
|
| 732 |
+
- Brief overview of what the research reveals
|
| 733 |
+
|
| 734 |
+
2. **DETAILED ANALYSIS**
|
| 735 |
+
- In-depth examination of all collected information
|
| 736 |
+
- Multiple perspectives and viewpoints found in sources
|
| 737 |
+
- Connections between different pieces of information
|
| 738 |
+
- Contradictions or debates if any exist
|
| 739 |
+
|
| 740 |
+
3. **BACKGROUND & CONTEXT**
|
| 741 |
+
- Historical background (if relevant)
|
| 742 |
+
- Current situation and status
|
| 743 |
+
- Relevant context that helps understand the topic
|
| 744 |
+
|
| 745 |
+
4. **KEY FINDINGS & INSIGHTS**
|
| 746 |
+
- Most important discoveries from the research
|
| 747 |
+
- Patterns and trends identified
|
| 748 |
+
- Significant facts and statistics
|
| 749 |
+
- Expert opinions and analysis
|
| 750 |
+
|
| 751 |
+
5. **CURRENT STATUS & DEVELOPMENTS**
|
| 752 |
+
- Latest information and recent developments
|
| 753 |
+
- Current state of affairs
|
| 754 |
+
- Recent changes or updates
|
| 755 |
+
|
| 756 |
+
6. **DIFFERENT PERSPECTIVES**
|
| 757 |
+
- Various viewpoints found in sources
|
| 758 |
+
- Debates and discussions around the topic
|
| 759 |
+
- Conflicting information (if any)
|
| 760 |
+
|
| 761 |
+
7. **IMPLICATIONS & SIGNIFICANCE**
|
| 762 |
+
- Why this topic matters
|
| 763 |
+
- Impact and consequences
|
| 764 |
+
- Future implications
|
| 765 |
+
|
| 766 |
+
8. **DETAILED BREAKDOWN**
|
| 767 |
+
- Specific details from each major source
|
| 768 |
+
- Technical information (if applicable)
|
| 769 |
+
- Statistics and data points
|
| 770 |
+
- Quotes and specific information
|
| 771 |
|
| 772 |
+
9. **CONCLUSIONS**
|
| 773 |
+
- Summary of what was discovered
|
| 774 |
+
- Final thoughts and analysis
|
| 775 |
+
- Gaps in information (if any)
|
| 776 |
+
|
| 777 |
+
10. **SOURCES & REFERENCES**
|
| 778 |
+
- List all sources with proper attribution
|
| 779 |
+
- Include URLs for verification
|
| 780 |
+
- Note the reliability and type of each source
|
| 781 |
+
|
| 782 |
+
FORMATTING REQUIREMENTS:
|
| 783 |
+
- Use clear Markdown formatting with headers (##), subheaders (###), and bullet points
|
| 784 |
+
- Make the content engaging, informative, and well-organized
|
| 785 |
+
- Include specific details, examples, and quotes from the sources
|
| 786 |
+
- Highlight important information with **bold text**
|
| 787 |
+
- Use bullet points for lists and key points
|
| 788 |
+
- Organize information logically and coherently
|
| 789 |
+
- If information is conflicting, present both sides
|
| 790 |
+
- If insufficient information is available for any section, clearly state what could not be determined
|
| 791 |
+
|
| 792 |
+
CONTENT REQUIREMENTS:
|
| 793 |
+
- Base your analysis ONLY on the provided source content
|
| 794 |
+
- Do not make assumptions or add information not present in the sources
|
| 795 |
+
- Include specific details and examples from multiple sources
|
| 796 |
+
- Synthesize information from all sources, don't just summarize each one separately
|
| 797 |
+
- Maintain objectivity and present facts as found in sources
|
| 798 |
+
- If sources contradict each other, present both perspectives
|
| 799 |
+
- Focus on creating a comprehensive understanding of {research_data['query']}
|
| 800 |
+
|
| 801 |
+
TOPIC-SPECIFIC FOCUS FOR {topic_type.upper()}:
|
| 802 |
+
{get_topic_specific_instructions(topic_type)}
|
| 803 |
+
|
| 804 |
+
Remember: This report should be thorough, well-researched, and provide real value to someone wanting to understand {research_data['query']} comprehensively.
|
| 805 |
"""
|
| 806 |
|
| 807 |
response = model.generate_content(prompt)
|
| 808 |
return response.text
|
| 809 |
except Exception as e:
|
| 810 |
error_msg = str(e).lower()
|
| 811 |
+
print(f"Report generation error: {e}") # Debug info
|
| 812 |
+
|
| 813 |
if "api key not valid" in error_msg or "api_key_invalid" in error_msg:
|
| 814 |
+
return """❌ Invalid API key during report generation.
|
| 815 |
+
|
| 816 |
+
**Common issues:**
|
| 817 |
+
• Your API key may have expired or been revoked
|
| 818 |
+
• Check if you copied the complete key
|
| 819 |
+
• Try regenerating your API key at https://aistudio.google.com/"""
|
| 820 |
+
|
| 821 |
+
elif "quota" in error_msg or "limit" in error_msg:
|
| 822 |
+
return """❌ API quota exceeded during report generation.
|
| 823 |
+
|
| 824 |
+
**Solutions:**
|
| 825 |
+
• Check your usage at https://aistudio.google.com/
|
| 826 |
+
• Wait for the quota to reset (usually monthly)
|
| 827 |
+
• Consider upgrading your plan if needed"""
|
| 828 |
+
|
| 829 |
+
elif "permission" in error_msg or "forbidden" in error_msg:
|
| 830 |
+
return """❌ API key doesn't have required permissions for report generation.
|
| 831 |
+
|
| 832 |
+
**Solutions:**
|
| 833 |
+
• Regenerate your API key at https://aistudio.google.com/
|
| 834 |
+
• Make sure the API key is enabled for Gemini API"""
|
| 835 |
+
|
| 836 |
+
elif "network" in error_msg or "connection" in error_msg or "timeout" in error_msg:
|
| 837 |
+
return """❌ Network error during report generation.
|
| 838 |
+
|
| 839 |
+
**Troubleshooting:**
|
| 840 |
+
• Check your internet connection
|
| 841 |
+
• Try again in a few minutes
|
| 842 |
+
• The report generation process may take some time"""
|
| 843 |
+
|
| 844 |
+
elif "model" in error_msg:
|
| 845 |
+
return """❌ Model not available for report generation.
|
| 846 |
+
|
| 847 |
+
**Solutions:**
|
| 848 |
+
• Try using a different model
|
| 849 |
+
• Check Gemini API availability at https://status.cloud.google.com/"""
|
| 850 |
+
|
| 851 |
else:
|
| 852 |
+
return f"""❌ Error generating report: {str(e)}
|
| 853 |
+
|
| 854 |
+
**Debugging tips:**
|
| 855 |
+
• Try with a shorter research topic
|
| 856 |
+
• Check your internet connection
|
| 857 |
+
• Make sure your API key has sufficient quota"""
|
| 858 |
+
|
| 859 |
+
def get_topic_specific_instructions(topic_type: str) -> str:
|
| 860 |
+
"""Get specific instructions based on topic category"""
|
| 861 |
+
instructions = {
|
| 862 |
+
'politics': """
|
| 863 |
+
- Focus on political implications, policy details, and governmental aspects
|
| 864 |
+
- Include information about key political figures, parties, and institutions
|
| 865 |
+
- Analyze policy impacts and political consequences
|
| 866 |
+
- Present multiple political perspectives objectively
|
| 867 |
+
- Include information about voting patterns, polls, or public opinion if available
|
| 868 |
+
""",
|
| 869 |
+
'history': """
|
| 870 |
+
- Provide chronological context and timeline of events
|
| 871 |
+
- Include historical significance and long-term impacts
|
| 872 |
+
- Mention key historical figures, dates, and places
|
| 873 |
+
- Analyze causes and effects of historical events
|
| 874 |
+
- Connect historical events to modern implications
|
| 875 |
+
""",
|
| 876 |
+
'geography': """
|
| 877 |
+
- Include specific geographical data, coordinates, and locations
|
| 878 |
+
- Provide demographic, climate, and physical geography information
|
| 879 |
+
- Discuss economic geography and natural resources
|
| 880 |
+
- Include maps, borders, and territorial information
|
| 881 |
+
- Analyze geographical impacts on society and economy
|
| 882 |
+
""",
|
| 883 |
+
'current_affairs': """
|
| 884 |
+
- Focus on the most recent developments and breaking news
|
| 885 |
+
- Include timeline of recent events
|
| 886 |
+
- Analyze immediate impacts and short-term consequences
|
| 887 |
+
- Provide context for why this is currently significant
|
| 888 |
+
- Include quotes from recent statements or press releases
|
| 889 |
+
""",
|
| 890 |
+
'technology': """
|
| 891 |
+
- Focus on technical specifications, capabilities, and limitations
|
| 892 |
+
- Include information about development timeline and key innovators
|
| 893 |
+
- Analyze technological implications and future potential
|
| 894 |
+
- Discuss adoption rates, market impact, and competitive landscape
|
| 895 |
+
- Include technical details and how the technology works
|
| 896 |
+
""",
|
| 897 |
+
'war': """
|
| 898 |
+
- Provide strategic analysis and military context
|
| 899 |
+
- Include information about forces, tactics, and equipment involved
|
| 900 |
+
- Analyze geopolitical implications and international responses
|
| 901 |
+
- Discuss humanitarian impacts and civilian consequences
|
| 902 |
+
- Present timeline of conflict development
|
| 903 |
+
""",
|
| 904 |
+
'economics': """
|
| 905 |
+
- Include specific economic data, statistics, and indicators
|
| 906 |
+
- Analyze market trends, financial impacts, and economic consequences
|
| 907 |
+
- Discuss effects on different sectors and stakeholders
|
| 908 |
+
- Include information about economic policies and their outcomes
|
| 909 |
+
- Provide context about economic significance and implications
|
| 910 |
+
""",
|
| 911 |
+
'science': """
|
| 912 |
+
- Focus on scientific methodology, research findings, and evidence
|
| 913 |
+
- Include information about research institutions and scientists involved
|
| 914 |
+
- Explain scientific concepts and their implications
|
| 915 |
+
- Discuss peer review status and scientific consensus
|
| 916 |
+
- Analyze potential applications and future research directions
|
| 917 |
+
"""
|
| 918 |
+
}
|
| 919 |
+
return instructions.get(topic_type, "Focus on providing comprehensive, factual information with proper context and analysis.")
|
| 920 |
|
| 921 |
# Main research function
|
| 922 |
+
def run_research(topic: str, gemini_api_key: str, download_format: str = "markdown"):
|
| 923 |
"""Run the complete research process"""
|
| 924 |
if not gemini_api_key.strip():
|
| 925 |
+
return "❌ Please enter your Gemini API key.", None, None, gr.update(visible=False), gr.update(visible=False)
|
| 926 |
|
| 927 |
if not topic.strip():
|
| 928 |
+
return "❌ Please enter a research topic.", None, None, gr.update(visible=False), gr.update(visible=False)
|
| 929 |
|
| 930 |
# First validate the API key
|
| 931 |
is_valid, validation_message = validate_api_key(gemini_api_key)
|
| 932 |
if not is_valid:
|
| 933 |
+
return f"❌ {validation_message}", None, None, gr.update(visible=False), gr.update(visible=False)
|
| 934 |
|
| 935 |
try:
|
| 936 |
# Perform research
|
| 937 |
+
print(f"Starting research for: {topic}")
|
| 938 |
research_data = perform_research(topic)
|
| 939 |
|
| 940 |
if not research_data['sources']:
|
| 941 |
+
return "❌ No relevant sources found. Please try a different search term.", None, None, gr.update(visible=False), gr.update(visible=False)
|
| 942 |
+
|
| 943 |
+
print(f"Found {len(research_data['sources'])} sources, generating report...")
|
| 944 |
|
| 945 |
# Generate report
|
| 946 |
report = generate_research_report(research_data, gemini_api_key)
|
| 947 |
|
| 948 |
+
# Check if report generation was successful
|
| 949 |
+
if report.startswith("❌"):
|
| 950 |
+
return report, None, None, gr.update(visible=False), gr.update(visible=False)
|
| 951 |
|
| 952 |
+
# Create safe downloadable filenames from the TOPIC, not the report content
|
| 953 |
+
base_filename = sanitize_filename(topic)
|
| 954 |
+
if not base_filename.endswith('.md'):
|
| 955 |
+
base_filename = base_filename.replace('.md', '') + '_report.md'
|
| 956 |
+
|
| 957 |
+
pdf_path = None
|
| 958 |
+
try:
|
| 959 |
+
# Generate PDF using the original topic for filename
|
| 960 |
+
pdf_path = create_pdf_report(report, topic, research_data['sources'], base_filename)
|
| 961 |
+
print(f"PDF generated successfully: {pdf_path}")
|
| 962 |
+
except Exception as pdf_error:
|
| 963 |
+
print(f"PDF generation failed: {pdf_error}")
|
| 964 |
+
# Continue without PDF if it fails
|
| 965 |
+
|
| 966 |
+
print(f"Research completed successfully. MD: {base_filename}")
|
| 967 |
+
|
| 968 |
+
return report, base_filename, pdf_path, gr.update(visible=True), gr.update(visible=True)
|
| 969 |
|
| 970 |
except Exception as e:
|
| 971 |
+
print(f"Research error: {e}") # Debug info
|
| 972 |
+
error_msg = f"❌ An error occurred during research: {str(e)}"
|
| 973 |
+
return error_msg, None, None, gr.update(visible=False), gr.update(visible=False)
|
| 974 |
|
| 975 |
+
# Gradio interface with dark theme
|
| 976 |
def create_interface():
|
| 977 |
+
# Dark theme CSS
|
| 978 |
+
dark_css = """
|
| 979 |
+
/* Dark theme base */
|
| 980 |
+
.gradio-container {
|
| 981 |
+
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%) !important;
|
| 982 |
+
min-height: 100vh;
|
| 983 |
+
color: white !important;
|
| 984 |
+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
| 985 |
+
}
|
| 986 |
+
|
| 987 |
+
/* All blocks and containers */
|
| 988 |
+
.block, .gr-box, .gr-form, .gr-panel {
|
| 989 |
+
background: rgba(255, 255, 255, 0.05) !important;
|
| 990 |
+
border: 1px solid rgba(255, 255, 255, 0.1) !important;
|
| 991 |
+
border-radius: 15px !important;
|
| 992 |
+
backdrop-filter: blur(10px) !important;
|
| 993 |
+
padding: 1.5rem !important;
|
| 994 |
+
margin: 0.5rem !important;
|
| 995 |
+
}
|
| 996 |
+
|
| 997 |
+
/* Text colors - ALL WHITE */
|
| 998 |
+
body, p, span, div, label, h1, h2, h3, h4, h5, h6 {
|
| 999 |
+
color: white !important;
|
| 1000 |
+
}
|
| 1001 |
+
|
| 1002 |
+
.gr-markdown, .gr-markdown * {
|
| 1003 |
+
color: white !important;
|
| 1004 |
+
background: transparent !important;
|
| 1005 |
+
}
|
| 1006 |
+
|
| 1007 |
+
.gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
|
| 1008 |
+
color: #64b5f6 !important;
|
| 1009 |
+
border-bottom: 1px solid rgba(255, 255, 255, 0.2) !important;
|
| 1010 |
+
}
|
| 1011 |
+
|
| 1012 |
+
/* Input fields */
|
| 1013 |
+
.gr-textbox, .gr-textbox input, .gr-textbox textarea {
|
| 1014 |
+
background: rgba(255, 255, 255, 0.1) !important;
|
| 1015 |
+
border: 1px solid rgba(255, 255, 255, 0.3) !important;
|
| 1016 |
+
border-radius: 10px !important;
|
| 1017 |
+
color: white !important;
|
| 1018 |
+
padding: 12px !important;
|
| 1019 |
+
}
|
| 1020 |
+
|
| 1021 |
+
.gr-textbox input::placeholder, .gr-textbox textarea::placeholder {
|
| 1022 |
+
color: rgba(255, 255, 255, 0.6) !important;
|
| 1023 |
+
}
|
| 1024 |
+
|
| 1025 |
+
.gr-textbox input:focus, .gr-textbox textarea:focus {
|
| 1026 |
+
border-color: #64b5f6 !important;
|
| 1027 |
+
box-shadow: 0 0 10px rgba(100, 181, 246, 0.3) !important;
|
| 1028 |
+
background: rgba(255, 255, 255, 0.15) !important;
|
| 1029 |
+
}
|
| 1030 |
+
|
| 1031 |
+
/* Buttons */
|
| 1032 |
+
.gr-button {
|
| 1033 |
+
border-radius: 25px !important;
|
| 1034 |
+
padding: 12px 24px !important;
|
| 1035 |
+
font-weight: 600 !important;
|
| 1036 |
+
text-transform: uppercase !important;
|
| 1037 |
+
letter-spacing: 0.5px !important;
|
| 1038 |
+
transition: all 0.3s ease !important;
|
| 1039 |
+
border: none !important;
|
| 1040 |
+
color: white !important;
|
| 1041 |
+
}
|
| 1042 |
+
|
| 1043 |
+
.gr-button-primary {
|
| 1044 |
+
background: linear-gradient(135deg, #64b5f6, #42a5f5) !important;
|
| 1045 |
+
box-shadow: 0 4px 15px rgba(100, 181, 246, 0.4) !important;
|
| 1046 |
+
}
|
| 1047 |
+
|
| 1048 |
+
.gr-button-primary:hover {
|
| 1049 |
+
background: linear-gradient(135deg, #42a5f5, #2196f3) !important;
|
| 1050 |
+
transform: translateY(-2px) !important;
|
| 1051 |
+
box-shadow: 0 6px 20px rgba(100, 181, 246, 0.6) !important;
|
| 1052 |
+
}
|
| 1053 |
+
|
| 1054 |
+
.gr-button-secondary {
|
| 1055 |
+
background: linear-gradient(135deg, #546e7a, #37474f) !important;
|
| 1056 |
+
box-shadow: 0 4px 15px rgba(84, 110, 122, 0.4) !important;
|
| 1057 |
+
}
|
| 1058 |
+
|
| 1059 |
+
.gr-button-secondary:hover {
|
| 1060 |
+
background: linear-gradient(135deg, #37474f, #263238) !important;
|
| 1061 |
+
transform: translateY(-2px) !important;
|
| 1062 |
+
}
|
| 1063 |
+
|
| 1064 |
+
/* Accordion */
|
| 1065 |
+
.gr-accordion {
|
| 1066 |
+
background: rgba(255, 255, 255, 0.05) !important;
|
| 1067 |
+
border: 1px solid rgba(255, 255, 255, 0.1) !important;
|
| 1068 |
+
border-radius: 12px !important;
|
| 1069 |
+
}
|
| 1070 |
+
|
| 1071 |
+
.gr-accordion summary {
|
| 1072 |
+
color: white !important;
|
| 1073 |
+
background: rgba(255, 255, 255, 0.1) !important;
|
| 1074 |
+
padding: 1rem !important;
|
| 1075 |
+
border-radius: 10px !important;
|
| 1076 |
+
}
|
| 1077 |
+
|
| 1078 |
+
/* Feature cards */
|
| 1079 |
+
.feature-card {
|
| 1080 |
+
background: rgba(100, 181, 246, 0.1) !important;
|
| 1081 |
+
border: 1px solid rgba(100, 181, 246, 0.3) !important;
|
| 1082 |
+
border-radius: 12px !important;
|
| 1083 |
+
padding: 1.5rem !important;
|
| 1084 |
+
margin: 1rem 0 !important;
|
| 1085 |
+
border-left: 4px solid #64b5f6 !important;
|
| 1086 |
+
backdrop-filter: blur(10px) !important;
|
| 1087 |
+
}
|
| 1088 |
+
|
| 1089 |
+
.feature-card h3, .feature-card h4 {
|
| 1090 |
+
color: #64b5f6 !important;
|
| 1091 |
+
margin-bottom: 1rem !important;
|
| 1092 |
+
}
|
| 1093 |
+
|
| 1094 |
+
.feature-card ul li {
|
| 1095 |
+
color: rgba(255, 255, 255, 0.9) !important;
|
| 1096 |
+
margin-bottom: 0.5rem !important;
|
| 1097 |
+
}
|
| 1098 |
+
|
| 1099 |
+
/* Status indicators */
|
| 1100 |
+
.status-success {
|
| 1101 |
+
background: rgba(76, 175, 80, 0.2) !important;
|
| 1102 |
+
border: 1px solid #4caf50 !important;
|
| 1103 |
+
border-left: 4px solid #4caf50 !important;
|
| 1104 |
+
color: #a5d6a7 !important;
|
| 1105 |
+
}
|
| 1106 |
+
|
| 1107 |
+
.status-error {
|
| 1108 |
+
background: rgba(244, 67, 54, 0.2) !important;
|
| 1109 |
+
border: 1px solid #f44336 !important;
|
| 1110 |
+
border-left: 4px solid #f44336 !important;
|
| 1111 |
+
color: #ef9a9a !important;
|
| 1112 |
+
}
|
| 1113 |
+
|
| 1114 |
+
/* Hero section */
|
| 1115 |
+
.hero-section {
|
| 1116 |
+
background: linear-gradient(135deg, #1565c0, #1976d2, #1e88e5) !important;
|
| 1117 |
+
border-radius: 15px !important;
|
| 1118 |
+
padding: 2rem !important;
|
| 1119 |
+
margin-bottom: 2rem !important;
|
| 1120 |
+
color: white !important;
|
| 1121 |
+
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3) !important;
|
| 1122 |
+
text-align: center !important;
|
| 1123 |
+
}
|
| 1124 |
+
|
| 1125 |
+
/* Download section */
|
| 1126 |
+
.download-section {
|
| 1127 |
+
background: rgba(100, 181, 246, 0.1) !important;
|
| 1128 |
+
border: 1px solid rgba(100, 181, 246, 0.3) !important;
|
| 1129 |
+
border-radius: 12px !important;
|
| 1130 |
+
padding: 1.5rem !important;
|
| 1131 |
+
text-align: center !important;
|
| 1132 |
+
color: white !important;
|
| 1133 |
+
}
|
| 1134 |
+
|
| 1135 |
+
/* Markdown content area */
|
| 1136 |
+
.gr-markdown {
|
| 1137 |
+
background: rgba(255, 255, 255, 0.05) !important;
|
| 1138 |
+
border: 1px solid rgba(255, 255, 255, 0.1) !important;
|
| 1139 |
+
border-radius: 10px !important;
|
| 1140 |
+
padding: 1.5rem !important;
|
| 1141 |
+
max-height: 500px !important;
|
| 1142 |
+
overflow-y: auto !important;
|
| 1143 |
+
}
|
| 1144 |
+
|
| 1145 |
+
/* Responsive design */
|
| 1146 |
+
@media (max-width: 768px) {
|
| 1147 |
+
.gradio-container {
|
| 1148 |
+
padding: 0.5rem !important;
|
| 1149 |
+
}
|
| 1150 |
+
|
| 1151 |
+
.block {
|
| 1152 |
+
margin: 0.25rem !important;
|
| 1153 |
+
padding: 1rem !important;
|
| 1154 |
+
}
|
| 1155 |
|
| 1156 |
+
.hero-section {
|
| 1157 |
+
padding: 1rem !important;
|
| 1158 |
+
}
|
| 1159 |
+
|
| 1160 |
+
.feature-card {
|
| 1161 |
+
padding: 1rem !important;
|
| 1162 |
+
margin: 0.5rem 0 !important;
|
| 1163 |
+
}
|
| 1164 |
+
}
|
| 1165 |
+
|
| 1166 |
+
/* Scrollbar styling */
|
| 1167 |
+
::-webkit-scrollbar {
|
| 1168 |
+
width: 8px;
|
| 1169 |
+
}
|
| 1170 |
+
|
| 1171 |
+
::-webkit-scrollbar-track {
|
| 1172 |
+
background: rgba(255, 255, 255, 0.1);
|
| 1173 |
+
border-radius: 4px;
|
| 1174 |
+
}
|
| 1175 |
+
|
| 1176 |
+
::-webkit-scrollbar-thumb {
|
| 1177 |
+
background: rgba(100, 181, 246, 0.6);
|
| 1178 |
+
border-radius: 4px;
|
| 1179 |
+
}
|
| 1180 |
+
|
| 1181 |
+
::-webkit-scrollbar-thumb:hover {
|
| 1182 |
+
background: rgba(100, 181, 246, 0.8);
|
| 1183 |
+
}
|
| 1184 |
+
"""
|
| 1185 |
+
|
| 1186 |
+
with gr.Blocks(
|
| 1187 |
+
title=f"{APP_NAME} | Advanced AI Research Assistant",
|
| 1188 |
+
theme=gr.themes.Base(
|
| 1189 |
+
primary_hue="blue",
|
| 1190 |
+
secondary_hue="gray",
|
| 1191 |
+
neutral_hue="slate",
|
| 1192 |
+
text_size="md",
|
| 1193 |
+
radius_size="lg",
|
| 1194 |
+
spacing_size="lg"
|
| 1195 |
+
).set(
|
| 1196 |
+
body_background_fill="linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%)",
|
| 1197 |
+
block_background_fill="rgba(255, 255, 255, 0.05)",
|
| 1198 |
+
block_border_color="rgba(255, 255, 255, 0.1)",
|
| 1199 |
+
block_radius="15px",
|
| 1200 |
+
button_primary_background_fill="linear-gradient(135deg, #64b5f6, #42a5f5)",
|
| 1201 |
+
button_primary_text_color="white",
|
| 1202 |
+
input_background_fill="rgba(255, 255, 255, 0.1)",
|
| 1203 |
+
input_border_color="rgba(255, 255, 255, 0.3)",
|
| 1204 |
+
body_text_color="white",
|
| 1205 |
+
block_label_text_color="white"
|
| 1206 |
+
),
|
| 1207 |
+
css=dark_css
|
| 1208 |
+
) as demo:
|
| 1209 |
+
|
| 1210 |
+
# Hero Section
|
| 1211 |
with gr.Row():
|
| 1212 |
+
with gr.Column():
|
| 1213 |
+
gr.HTML(f"""
|
| 1214 |
+
<div class="hero-section">
|
| 1215 |
+
<h1 style="font-size: 3rem; font-weight: bold; margin: 0; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">
|
| 1216 |
+
🔬 {APP_NAME}
|
| 1217 |
+
</h1>
|
| 1218 |
+
<h2 style="font-size: 1.5rem; margin: 0.5rem 0; opacity: 0.9;">
|
| 1219 |
+
{APP_DESCRIPTION}
|
| 1220 |
+
</h2>
|
| 1221 |
+
<p style="font-size: 1.1rem; margin: 1rem 0; opacity: 0.8;">
|
| 1222 |
+
Powered by Google Gemini AI & Advanced Web Research
|
| 1223 |
+
</p>
|
| 1224 |
+
</div>
|
| 1225 |
+
""")
|
| 1226 |
+
|
| 1227 |
+
# Features Overview
|
| 1228 |
+
with gr.Row():
|
| 1229 |
+
with gr.Column():
|
| 1230 |
+
gr.HTML("""
|
| 1231 |
+
<div class="feature-card">
|
| 1232 |
+
<h3>🎯 What this tool does:</h3>
|
| 1233 |
+
<ul style="margin: 1rem 0;">
|
| 1234 |
+
<li><strong>🔍 Intelligent Search:</strong> Uses DuckDuckGo to find the most relevant sources</li>
|
| 1235 |
+
<li><strong>📊 Content Analysis:</strong> Extracts and processes content from multiple websites</li>
|
| 1236 |
+
<li><strong>🤖 AI Synthesis:</strong> Uses Google Gemini to create comprehensive reports</li>
|
| 1237 |
+
<li><strong>📄 Professional Output:</strong> Generates both Markdown and PDF reports</li>
|
| 1238 |
+
<li><strong>⚡ Fast & Reliable:</strong> Automated research in minutes, not hours</li>
|
| 1239 |
+
</ul>
|
| 1240 |
+
</div>
|
| 1241 |
+
""")
|
| 1242 |
+
|
| 1243 |
+
# Simple API Key Section
|
| 1244 |
+
with gr.Row():
|
| 1245 |
+
with gr.Column():
|
| 1246 |
+
gr.HTML("""
|
| 1247 |
+
<div class="feature-card">
|
| 1248 |
+
<h3>� API Key Setup</h3>
|
| 1249 |
+
<p>Get your free Gemini API key from <a href="https://aistudio.google.com/" target="_blank" style="color: #64b5f6;">Google AI Studio</a></p>
|
| 1250 |
+
</div>
|
| 1251 |
+
""")
|
| 1252 |
|
| 1253 |
+
with gr.Row():
|
| 1254 |
+
with gr.Column(scale=3):
|
| 1255 |
+
gemini_key = gr.Textbox(
|
| 1256 |
+
label="🔐 Enter your Gemini API Key",
|
| 1257 |
+
type="password",
|
| 1258 |
+
placeholder="Paste your API key here...",
|
| 1259 |
+
container=True
|
| 1260 |
+
)
|
| 1261 |
+
with gr.Column(scale=1):
|
| 1262 |
+
validate_btn = gr.Button(
|
| 1263 |
+
"🔍 Validate",
|
| 1264 |
+
variant="secondary",
|
| 1265 |
+
size="lg"
|
| 1266 |
+
)
|
| 1267 |
|
| 1268 |
+
validation_output = gr.HTML(visible=False)
|
| 1269 |
+
|
| 1270 |
+
# Main Research Interface
|
| 1271 |
+
gr.HTML("<h2 style='text-align: center; color: #2c3e50; margin: 2rem 0;'>🔬 Start Your Research</h2>")
|
| 1272 |
+
|
| 1273 |
+
with gr.Row():
|
| 1274 |
with gr.Column(scale=2):
|
| 1275 |
research_topic = gr.Textbox(
|
| 1276 |
+
label="🎯 Research Topic",
|
| 1277 |
+
placeholder="Enter your research topic here... (e.g., 'Latest developments in quantum computing', 'Climate change solutions 2024', 'AI trends in healthcare')",
|
| 1278 |
+
lines=3,
|
| 1279 |
+
container=True
|
| 1280 |
)
|
| 1281 |
|
| 1282 |
+
with gr.Row():
|
| 1283 |
+
research_btn = gr.Button(
|
| 1284 |
+
"🚀 Start Deep Research",
|
| 1285 |
+
variant="primary",
|
| 1286 |
+
size="lg",
|
| 1287 |
+
scale=2
|
| 1288 |
+
)
|
| 1289 |
+
with gr.Column(scale=1):
|
| 1290 |
+
gr.HTML("<div style='padding: 1rem;'></div>")
|
| 1291 |
+
|
| 1292 |
+
with gr.Column(scale=1):
|
| 1293 |
+
gr.HTML("""
|
| 1294 |
+
<div class="feature-card">
|
| 1295 |
+
<h4>💡 Research Tips:</h4>
|
| 1296 |
+
<ul style="font-size: 0.9rem;">
|
| 1297 |
+
<li><strong>Be Specific:</strong> "AI in healthcare 2024" vs "AI"</li>
|
| 1298 |
+
<li><strong>Include Context:</strong> Add year, location, or specific aspect</li>
|
| 1299 |
+
<li><strong>Ask Questions:</strong> "What is the impact of...?"</li>
|
| 1300 |
+
<li><strong>Current Events:</strong> Include "latest" or "current"</li>
|
| 1301 |
+
<li><strong>Multiple Angles:</strong> "Causes and solutions of..."</li>
|
| 1302 |
+
</ul>
|
| 1303 |
+
<div style="margin-top: 1rem; padding: 0.8rem; background: rgba(76, 175, 80, 0.1); border-radius: 6px; border-left: 3px solid #4caf50;">
|
| 1304 |
+
<strong>📊 Research Power:</strong><br>
|
| 1305 |
+
<small>10+ sources • Topic categorization • Authoritative domains • AI synthesis</small>
|
| 1306 |
+
</div>
|
| 1307 |
+
</div>
|
| 1308 |
+
""")
|
| 1309 |
+
|
| 1310 |
+
# Progress and Results Section
|
| 1311 |
+
with gr.Row():
|
| 1312 |
+
with gr.Column():
|
| 1313 |
+
progress_html = gr.HTML(visible=False)
|
| 1314 |
|
| 1315 |
output = gr.Markdown(
|
| 1316 |
+
value="Your comprehensive research report will appear here...",
|
| 1317 |
+
label="📊 Research Report",
|
| 1318 |
+
container=True,
|
| 1319 |
+
height=400
|
| 1320 |
)
|
| 1321 |
+
|
| 1322 |
+
# Download Section
|
| 1323 |
+
with gr.Row():
|
| 1324 |
+
with gr.Column():
|
| 1325 |
+
download_section = gr.HTML(visible=False)
|
| 1326 |
|
| 1327 |
+
with gr.Row():
|
| 1328 |
+
with gr.Column():
|
| 1329 |
+
download_md_btn = gr.DownloadButton(
|
| 1330 |
+
"📝 Download Markdown",
|
| 1331 |
+
visible=False,
|
| 1332 |
+
variant="secondary",
|
| 1333 |
+
size="lg"
|
| 1334 |
+
)
|
| 1335 |
+
with gr.Column():
|
| 1336 |
+
download_pdf_btn = gr.DownloadButton(
|
| 1337 |
+
"📄 Download PDF Report",
|
| 1338 |
+
visible=False,
|
| 1339 |
+
variant="primary",
|
| 1340 |
+
size="lg"
|
| 1341 |
+
)
|
| 1342 |
+
|
| 1343 |
+
# Footer
|
| 1344 |
+
gr.HTML(f"""
|
| 1345 |
+
<div style="text-align: center; padding: 2rem; color: #7f8c8d; border-top: 1px solid #ecf0f1; margin-top: 3rem;">
|
| 1346 |
+
<p>🔬 <strong>{APP_NAME} {APP_VERSION}</strong> | Advanced AI Research Assistant</p>
|
| 1347 |
+
<p>Powered by Google Gemini AI • Built with ❤️ for researchers worldwide</p>
|
| 1348 |
+
</div>
|
| 1349 |
+
""")
|
| 1350 |
|
| 1351 |
+
# Event Handlers
|
| 1352 |
+
def validate_key_handler(api_key):
|
| 1353 |
if not api_key:
|
| 1354 |
+
return gr.update(
|
| 1355 |
+
visible=True,
|
| 1356 |
+
value='<div class="status-error"><h4>❌ API Key Required</h4><p>Please enter your Gemini API key above.</p></div>'
|
| 1357 |
+
)
|
| 1358 |
|
| 1359 |
is_valid, message = validate_api_key(api_key)
|
| 1360 |
if is_valid:
|
| 1361 |
+
return gr.update(
|
| 1362 |
+
visible=True,
|
| 1363 |
+
value=f'<div class="status-success"><h4>✅ API Key Valid!</h4><p>{message}</p><p>You\'re ready to start researching!</p></div>'
|
| 1364 |
+
)
|
| 1365 |
else:
|
| 1366 |
+
return gr.update(
|
| 1367 |
+
visible=True,
|
| 1368 |
+
value=f'<div class="status-error"><h4>❌ API Key Issue</h4><div style="white-space: pre-line;">{message}</div></div>'
|
| 1369 |
+
)
|
| 1370 |
+
|
| 1371 |
+
def research_handler(topic, api_key):
|
| 1372 |
+
if not api_key.strip():
|
| 1373 |
+
return (
|
| 1374 |
+
"❌ Please enter and validate your Gemini API key first.",
|
| 1375 |
+
None, None,
|
| 1376 |
+
gr.update(visible=False),
|
| 1377 |
+
gr.update(visible=False),
|
| 1378 |
+
gr.update(visible=False)
|
| 1379 |
+
)
|
| 1380 |
+
|
| 1381 |
+
if not topic.strip():
|
| 1382 |
+
return (
|
| 1383 |
+
"❌ Please enter a research topic.",
|
| 1384 |
+
None, None,
|
| 1385 |
+
gr.update(visible=False),
|
| 1386 |
+
gr.update(visible=False),
|
| 1387 |
+
gr.update(visible=False)
|
| 1388 |
+
)
|
| 1389 |
+
|
| 1390 |
+
# Show progress
|
| 1391 |
+
progress_msg = f"""
|
| 1392 |
+
<div class="feature-card">
|
| 1393 |
+
<h4>🔄 Research in Progress...</h4>
|
| 1394 |
+
<p>📊 Analyzing: <strong>{topic}</strong></p>
|
| 1395 |
+
<p>⏳ This may take 1-2 minutes. Please wait...</p>
|
| 1396 |
+
</div>
|
| 1397 |
+
"""
|
| 1398 |
+
|
| 1399 |
+
return run_research(topic, api_key)
|
| 1400 |
|
| 1401 |
+
# Wire up events
|
| 1402 |
validate_btn.click(
|
| 1403 |
+
fn=validate_key_handler,
|
| 1404 |
inputs=[gemini_key],
|
| 1405 |
+
outputs=[validation_output]
|
| 1406 |
)
|
| 1407 |
|
|
|
|
| 1408 |
research_btn.click(
|
| 1409 |
fn=run_research,
|
| 1410 |
inputs=[research_topic, gemini_key],
|
| 1411 |
+
outputs=[output, download_md_btn, download_pdf_btn, download_md_btn, download_pdf_btn]
|
| 1412 |
)
|
| 1413 |
|
| 1414 |
+
# Download handlers
|
| 1415 |
+
def create_md_file(content):
|
| 1416 |
+
if content and content.strip():
|
| 1417 |
+
return content
|
| 1418 |
+
return "No content available"
|
| 1419 |
|
| 1420 |
+
def get_pdf_file(pdf_path):
|
| 1421 |
+
if pdf_path and os.path.exists(pdf_path):
|
| 1422 |
+
return pdf_path
|
| 1423 |
+
return None
|
| 1424 |
+
|
| 1425 |
+
download_md_btn.click(
|
| 1426 |
+
fn=create_md_file,
|
| 1427 |
inputs=[output],
|
| 1428 |
+
outputs=[download_md_btn]
|
| 1429 |
+
)
|
| 1430 |
+
|
| 1431 |
+
download_pdf_btn.click(
|
| 1432 |
+
fn=get_pdf_file,
|
| 1433 |
+
inputs=[download_pdf_btn],
|
| 1434 |
+
outputs=[download_pdf_btn]
|
| 1435 |
)
|
| 1436 |
|
| 1437 |
return demo
|