hari7261 commited on
Commit
a5555a4
·
verified ·
1 Parent(s): a792bab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1286 -144
app.py CHANGED
@@ -8,125 +8,694 @@ from urllib.parse import urlparse
8
  import re
9
  import json
10
  from typing import List, Dict, Any
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Validate Gemini API key
13
  def validate_api_key(api_key: str) -> tuple[bool, str]:
14
  """Validate if the Gemini API key is working"""
15
  if not api_key or not api_key.strip():
16
- return False, "API key is empty. Please enter a valid Gemini API key."
17
-
18
- if not api_key.startswith('AI'):
19
- return False, "Invalid API key format. Gemini API keys should start with 'AI'."
20
-
 
 
 
 
 
 
21
  try:
22
  # Test the API key with a simple request
23
- genai.configure(api_key=api_key.strip())
24
  model = genai.GenerativeModel('gemini-2.0-flash')
25
-
26
- # Try a minimal test generation
27
- response = model.generate_content("Hello")
28
- return True, "API key is valid."
29
-
30
  except Exception as e:
31
  error_msg = str(e).lower()
 
 
32
  if "api key not valid" in error_msg or "api_key_invalid" in error_msg:
33
- return False, "Invalid API key. Please check your Gemini API key and try again."
34
- elif "quota" in error_msg:
35
- return False, "API quota exceeded. Please check your Gemini API usage limits."
36
- elif "permission" in error_msg:
37
- return False, "API key doesn't have required permissions. Please check your API key settings."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  else:
39
- return False, f"API key validation failed: {str(e)}"
 
 
 
 
 
40
 
41
- # Search the web for relevant information using DuckDuckGo
42
- def web_search(query: str, max_results: int = 10) -> List[Dict[str, str]]:
43
- """Search the web for relevant information using DuckDuckGo"""
44
  try:
45
  with DDGS() as ddgs:
46
- # Add timeout and retry logic
47
- results = []
48
- for result in ddgs.text(query, max_results=max_results):
49
- results.append(result)
50
- if len(results) >= max_results:
51
- break
52
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  except Exception as e:
54
  print(f"Search error: {e}")
55
- # Try with a simpler approach if the first fails
56
  try:
57
  with DDGS() as ddgs:
58
  results = list(ddgs.text(query, max_results=min(max_results, 5)))
 
59
  return results
60
  except Exception as e2:
61
- print(f"Retry search error: {e2}")
62
  return []
63
 
64
- # Fetch and extract content from a URL
65
  def fetch_url_content(url: str) -> str:
66
- """Fetch content from a URL and extract meaningful text"""
67
  try:
68
  headers = {
69
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 
 
 
 
 
70
  }
71
- response = requests.get(url, headers=headers, timeout=10)
 
 
72
  response.raise_for_status()
73
 
74
  soup = BeautifulSoup(response.content, 'html.parser')
75
 
76
  # Remove unwanted elements
77
- for element in soup(['script', 'style', 'nav', 'footer', 'header', 'aside']):
78
  element.decompose()
79
 
80
- # Get text content
81
- text = soup.get_text()
 
 
 
 
82
 
83
- # Clean up text
84
  lines = (line.strip() for line in text.splitlines())
85
  chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
86
- text = ' '.join(chunk for chunk in chunks if chunk)
 
 
 
 
87
 
88
- return text[:5000] # Limit content length to avoid token limits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  except Exception as e:
90
- print(f"Error fetching {url}: {e}")
91
  return ""
92
 
93
- # Research function using web search and content extraction
94
- def perform_research(query: str, max_sources: int = 5) -> Dict[str, Any]:
95
- """Perform research by searching and extracting content from multiple sources"""
96
- print(f"Researching: {query}")
 
 
 
 
97
 
98
- # Search for relevant sources
99
- search_results = web_search(query, max_results=max_sources*2) # Get extra results to account for failed fetches
 
100
 
101
  sources = []
102
  content_chunks = []
 
 
103
 
104
- for i, result in enumerate(search_results[:max_sources]):
105
- print(f"Fetching content from {result['href']}")
106
- content = fetch_url_content(result['href'])
 
 
 
 
 
 
 
 
107
 
108
- if content and len(content) > 200: # Only include if we got meaningful content
109
- sources.append({
110
- 'title': result.get('title', 'No title'),
111
- 'url': result.get('href', ''),
112
- 'content': content
113
- })
114
- content_chunks.append(f"SOURCE {i+1}:\nURL: {result.get('href', '')}\nCONTENT:\n{content}\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
- # Be polite with delays between requests
117
- time.sleep(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  research_context = "\n".join(content_chunks)
120
 
 
 
 
121
  return {
122
  'sources': sources,
123
  'research_context': research_context,
124
- 'query': query
 
 
 
125
  }
126
 
127
- # Generate a research report using Gemini
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  def generate_research_report(research_data: Dict[str, Any], gemini_api_key: str) -> str:
129
- """Generate a comprehensive research report using Gemini"""
130
  if not gemini_api_key:
131
  return "❌ Gemini API key is required to generate the report."
132
 
@@ -139,157 +708,730 @@ def generate_research_report(research_data: Dict[str, Any], gemini_api_key: str)
139
  # Initialize Gemini (already configured in validation)
140
  model = genai.GenerativeModel('gemini-2.0-flash')
141
 
 
 
 
 
142
  prompt = f"""
143
- RESEARCH TOPIC: {research_data['query']}
 
 
 
144
 
145
- RESEARCH CONTEXT FROM VARIOUS SOURCES:
146
  {research_data['research_context']}
147
 
148
- Please analyze this research and create a comprehensive, well-structured report with:
149
- 1. Key findings and insights
150
- 2. Detailed explanations of complex concepts
151
- 3. Relevant examples and case studies
152
- 4. Real-world applications
153
- 5. Future predictions and trends
154
- 6. Citations for all sources with links
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- Format your response using Markdown with appropriate headings, subheadings, bullet points, and bold text for emphasis.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  """
158
 
159
  response = model.generate_content(prompt)
160
  return response.text
161
  except Exception as e:
162
  error_msg = str(e).lower()
 
 
163
  if "api key not valid" in error_msg or "api_key_invalid" in error_msg:
164
- return "❌ Invalid API key. Please check your Gemini API key and try again."
165
- elif "quota" in error_msg:
166
- return "❌ API quota exceeded. Please check your Gemini API usage limits."
167
- elif "permission" in error_msg:
168
- return "❌ API key doesn't have required permissions. Please check your API key settings."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  else:
170
- return f"❌ Error generating report: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  # Main research function
173
- def run_research(topic: str, gemini_api_key: str):
174
  """Run the complete research process"""
175
  if not gemini_api_key.strip():
176
- return "❌ Please enter your Gemini API key.", None, gr.update(visible=False)
177
 
178
  if not topic.strip():
179
- return "❌ Please enter a research topic.", None, gr.update(visible=False)
180
 
181
  # First validate the API key
182
  is_valid, validation_message = validate_api_key(gemini_api_key)
183
  if not is_valid:
184
- return f"❌ {validation_message}", None, gr.update(visible=False)
185
 
186
  try:
187
  # Perform research
 
188
  research_data = perform_research(topic)
189
 
190
  if not research_data['sources']:
191
- return "❌ No relevant sources found. Please try a different search term.", None, gr.update(visible=False)
 
 
192
 
193
  # Generate report
194
  report = generate_research_report(research_data, gemini_api_key)
195
 
196
- # Create a downloadable file
197
- filename = f"{topic.replace(' ', '_')}_report.md"
 
198
 
199
- return report, filename, gr.update(visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  except Exception as e:
202
- error_msg = f" An error occurred: {str(e)}"
203
- return error_msg, None, gr.update(visible=False)
 
204
 
205
- # Gradio interface
206
  def create_interface():
207
- with gr.Blocks(title="Gemini Deep Research Agent", theme=gr.themes.Soft()) as demo:
208
- gr.Markdown("# 📘 Gemini Deep Research Agent")
209
- gr.Markdown("This agent performs deep research on any topic using Google's Gemini and DuckDuckGo search")
210
-
211
- # Add API key help section
212
- with gr.Accordion("🔑 How to get your Gemini API Key", open=False):
213
- gr.Markdown("""
214
- 1. Visit [Google AI Studio](https://aistudio.google.com/)
215
- 2. Sign in with your Google account
216
- 3. Click "Get API Key"
217
- 4. Create a new API key
218
- 5. Copy and paste it below
219
-
220
- **Note:** Your API key should start with "AI" and be kept secure.
221
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  with gr.Row():
224
- with gr.Column(scale=1):
225
- gr.Markdown("## API Configuration")
226
- gemini_key = gr.Textbox(
227
- label="Gemini API Key",
228
- type="password",
229
- placeholder="Enter your Gemini API key (starts with 'AI')",
230
- info="Get your free API key from https://aistudio.google.com/"
231
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
- # Add API key validation button
234
- validate_btn = gr.Button("🔍 Validate API Key", size="sm")
235
- validation_output = gr.Textbox(
236
- label="Validation Status",
237
- interactive=False,
238
- visible=False
239
- )
 
 
 
 
 
 
 
240
 
 
 
 
 
 
 
241
  with gr.Column(scale=2):
242
  research_topic = gr.Textbox(
243
- label="Research Topic",
244
- placeholder="e.g., Latest developments in AI, Climate change solutions, Cryptocurrency trends",
245
- lines=2
 
246
  )
247
 
248
- research_btn = gr.Button("🚀 Start Research", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
  output = gr.Markdown(
251
- label="Research Report",
252
- value="Your research report will appear here..."
 
 
253
  )
 
 
 
 
 
254
 
255
- download_btn = gr.DownloadButton(
256
- "📥 Download Report",
257
- visible=False
258
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
- # API key validation function
261
- def validate_key(api_key):
262
  if not api_key:
263
- return gr.update(visible=True, value="❌ Please enter an API key"), gr.update()
 
 
 
264
 
265
  is_valid, message = validate_api_key(api_key)
266
  if is_valid:
267
- return gr.update(visible=True, value=f"✅ {message}"), gr.update()
 
 
 
268
  else:
269
- return gr.update(visible=True, value=f"❌ {message}"), gr.update()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
- # Set up the validation button
272
  validate_btn.click(
273
- fn=validate_key,
274
  inputs=[gemini_key],
275
- outputs=[validation_output, validation_output]
276
  )
277
 
278
- # Set up the research button
279
  research_btn.click(
280
  fn=run_research,
281
  inputs=[research_topic, gemini_key],
282
- outputs=[output, download_btn, download_btn]
283
  )
284
 
285
- # Set up download functionality
286
- def create_file(content):
287
- return content
 
 
288
 
289
- download_btn.click(
290
- fn=create_file,
 
 
 
 
 
291
  inputs=[output],
292
- outputs=[download_btn]
 
 
 
 
 
 
293
  )
294
 
295
  return demo
 
8
  import re
9
  import json
10
  from typing import List, Dict, Any
11
+ from datetime import datetime
12
+ import os
13
+ import tempfile
14
+ from reportlab.lib.pagesizes import letter, A4
15
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle
16
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
17
+ from reportlab.lib.units import inch
18
+ from reportlab.lib import colors
19
+ from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
20
+ import markdown
21
+
22
+ # Application Constants
23
+ APP_NAME = "DeepResearchAgent-AI"
24
+ APP_VERSION = "v2.0"
25
+ APP_DESCRIPTION = "Advanced AI-Powered Research Assistant"
26
+
27
+ # Enhanced topic detection and search helper functions
28
+ def detect_topic_category(query: str) -> str:
29
+ """Detect the category of research topic for specialized search strategies"""
30
+ politics_keywords = ['politics', 'political', 'government', 'policy', 'election', 'democracy', 'parliament', 'congress', 'senate', 'president', 'minister', 'geopolitics', 'diplomacy', 'foreign policy', 'international relations']
31
+ history_keywords = ['history', 'historical', 'ancient', 'medieval', 'world war', 'civilization', 'empire', 'dynasty', 'revolution', 'century', 'era', 'timeline', 'past', 'heritage']
32
+ geography_keywords = ['geography', 'geographical', 'country', 'continent', 'ocean', 'mountain', 'river', 'climate', 'population', 'capital', 'border', 'region', 'territory', 'map']
33
+ current_affairs_keywords = ['current', 'news', 'today', 'recent', 'latest', 'breaking', 'update', 'happening', '2024', '2025', 'this year', 'now']
34
+ technology_keywords = ['technology', 'tech', 'ai', 'artificial intelligence', 'machine learning', 'software', 'hardware', 'computer', 'digital', 'programming', 'coding', 'algorithm', 'data science', 'cybersecurity']
35
+ war_keywords = ['war', 'warfare', 'conflict', 'battle', 'military', 'army', 'defense', 'weapon', 'strategy', 'combat', 'invasion', 'occupation', 'siege']
36
+ economics_keywords = ['economy', 'economic', 'finance', 'financial', 'market', 'trade', 'business', 'industry', 'company', 'corporation', 'gdp', 'inflation', 'recession']
37
+ science_keywords = ['science', 'scientific', 'research', 'study', 'experiment', 'discovery', 'innovation', 'physics', 'chemistry', 'biology', 'medicine', 'health']
38
+
39
+ query_lower = query.lower()
40
+
41
+ if any(keyword in query_lower for keyword in politics_keywords):
42
+ return 'politics'
43
+ elif any(keyword in query_lower for keyword in history_keywords):
44
+ return 'history'
45
+ elif any(keyword in query_lower for keyword in geography_keywords):
46
+ return 'geography'
47
+ elif any(keyword in query_lower for keyword in current_affairs_keywords):
48
+ return 'current_affairs'
49
+ elif any(keyword in query_lower for keyword in technology_keywords):
50
+ return 'technology'
51
+ elif any(keyword in query_lower for keyword in war_keywords):
52
+ return 'war'
53
+ elif any(keyword in query_lower for keyword in economics_keywords):
54
+ return 'economics'
55
+ elif any(keyword in query_lower for keyword in science_keywords):
56
+ return 'science'
57
+ else:
58
+ return 'general'
59
+
60
+ def get_specialized_domains(topic_type: str) -> List[str]:
61
+ """Get specialized domains based on topic category"""
62
+ domain_mapping = {
63
+ 'politics': ['reuters.com', 'bbc.com', 'cnn.com', 'politico.com', 'foreignaffairs.com', 'cfr.org', 'brookings.edu', 'csis.org'],
64
+ 'history': ['britannica.com', 'history.com', 'nationalgeographic.com', 'smithsonianmag.com', 'historynet.com', 'worldhistory.org'],
65
+ 'geography': ['nationalgeographic.com', 'worldatlas.com', 'britannica.com', 'cia.gov', 'worldbank.org', 'un.org'],
66
+ 'current_affairs': ['reuters.com', 'bbc.com', 'cnn.com', 'ap.org', 'npr.org', 'aljazeera.com', 'theguardian.com', 'nytimes.com'],
67
+ 'technology': ['techcrunch.com', 'wired.com', 'ars-technica.com', 'ieee.org', 'nature.com', 'sciencemag.org', 'mit.edu', 'stanford.edu'],
68
+ 'war': ['janes.com', 'defensenews.com', 'militarytimes.com', 'csis.org', 'rand.org', 'stratfor.com'],
69
+ 'economics': ['reuters.com', 'bloomberg.com', 'economist.com', 'ft.com', 'worldbank.org', 'imf.org', 'federalreserve.gov'],
70
+ 'science': ['nature.com', 'sciencemag.org', 'scientificamerican.com', 'newscientist.com', 'pnas.org', 'cell.com'],
71
+ 'general': ['wikipedia.org', 'britannica.com', 'reuters.com', 'bbc.com', 'cnn.com']
72
+ }
73
+ return domain_mapping.get(topic_type, domain_mapping['general'])
74
+
75
+ def get_topic_keywords(query: str, topic_type: str) -> List[str]:
76
+ """Get enhanced keywords based on topic category"""
77
+ keyword_mapping = {
78
+ 'politics': ['analysis', 'policy', 'government', 'official', 'statement', 'report', 'briefing', 'summit', 'debate', 'legislation'],
79
+ 'history': ['timeline', 'chronology', 'facts', 'documented', 'archive', 'primary source', 'historian', 'evidence', 'analysis', 'context'],
80
+ 'geography': ['facts', 'statistics', 'data', 'demographic', 'topography', 'atlas', 'survey', 'official', 'census', 'coordinates'],
81
+ 'current_affairs': ['breaking', 'latest', 'update', 'developing', 'live', 'recent', 'today', 'headlines', 'news', 'report'],
82
+ 'technology': ['innovation', 'breakthrough', 'development', 'advancement', 'research', 'cutting-edge', 'emerging', 'trend', 'future', 'application'],
83
+ 'war': ['analysis', 'strategy', 'tactics', 'intelligence', 'assessment', 'report', 'conflict', 'situation', 'update', 'briefing'],
84
+ 'economics': ['analysis', 'forecast', 'data', 'statistics', 'trend', 'market', 'report', 'outlook', 'indicator', 'growth'],
85
+ 'science': ['research', 'study', 'discovery', 'breakthrough', 'publication', 'peer-reviewed', 'journal', 'findings', 'methodology', 'evidence'],
86
+ 'general': ['information', 'facts', 'comprehensive', 'detailed', 'overview', 'guide', 'explanation', 'analysis', 'summary', 'background']
87
+ }
88
+ return keyword_mapping.get(topic_type, keyword_mapping['general'])
89
+
90
+ def get_priority_domains_for_topic(topic_type: str) -> List[str]:
91
+ """Get priority domains for result ranking based on topic"""
92
+ priority_mapping = {
93
+ 'politics': ['reuters.com', 'bbc.com', 'cnn.com', 'politico.com', 'foreignaffairs.com', 'cfr.org', 'brookings.edu', 'apnews.com'],
94
+ 'history': ['britannica.com', 'history.com', 'nationalgeographic.com', 'smithsonianmag.com', 'worldhistory.org', 'historynet.com'],
95
+ 'geography': ['nationalgeographic.com', 'worldatlas.com', 'britannica.com', 'cia.gov', 'worldbank.org', 'un.org'],
96
+ 'current_affairs': ['reuters.com', 'bbc.com', 'cnn.com', 'ap.org', 'npr.org', 'aljazeera.com', 'theguardian.com', 'nytimes.com'],
97
+ 'technology': ['techcrunch.com', 'wired.com', 'ars-technica.com', 'ieee.org', 'nature.com', 'mit.edu', 'stanford.edu', 'acm.org'],
98
+ 'war': ['janes.com', 'defensenews.com', 'csis.org', 'rand.org', 'stratfor.com', 'cfr.org'],
99
+ 'economics': ['reuters.com', 'bloomberg.com', 'economist.com', 'ft.com', 'worldbank.org', 'imf.org', 'federalreserve.gov'],
100
+ 'science': ['nature.com', 'sciencemag.org', 'scientificamerican.com', 'newscientist.com', 'pnas.org', 'cell.com'],
101
+ 'general': ['wikipedia.org', 'britannica.com', 'reuters.com', 'bbc.com', 'cnn.com', 'nationalgeographic.com']
102
+ }
103
+ return priority_mapping.get(topic_type, priority_mapping['general'])
104
+
105
+ # Sanitize filename for safe file creation
106
+ def sanitize_filename(filename: str) -> str:
107
+ """Sanitize filename to remove invalid characters for Windows/Unix systems"""
108
+ # Remove or replace invalid characters
109
+ invalid_chars = '<>:"/\\|?*'
110
+ for char in invalid_chars:
111
+ filename = filename.replace(char, '_')
112
+
113
+ # Remove multiple consecutive underscores and trim
114
+ filename = re.sub(r'_+', '_', filename)
115
+ filename = filename.strip('_')
116
+
117
+ # Limit length to prevent issues
118
+ if len(filename) > 200:
119
+ filename = filename[:200]
120
+
121
+ # Ensure it's not empty and add extension if missing
122
+ if not filename:
123
+ filename = "research_report"
124
+
125
+ if not filename.endswith('.md'):
126
+ filename += '.md'
127
+
128
+ return filename
129
+
130
+ # PDF Generation Function
131
+ def create_pdf_report(content: str, topic: str, sources: List[Dict], filename: str) -> str:
132
+ """Create a professional PDF report from markdown content"""
133
+ try:
134
+ # Create temporary PDF file
135
+ temp_dir = tempfile.gettempdir()
136
+ pdf_path = os.path.join(temp_dir, filename.replace('.md', '.pdf'))
137
+
138
+ # Create PDF document
139
+ doc = SimpleDocTemplate(pdf_path, pagesize=A4, topMargin=1*inch, bottomMargin=1*inch)
140
+ styles = getSampleStyleSheet()
141
+ story = []
142
+
143
+ # Custom styles
144
+ title_style = ParagraphStyle(
145
+ 'CustomTitle',
146
+ parent=styles['Heading1'],
147
+ fontSize=24,
148
+ textColor=colors.HexColor('#2C3E50'),
149
+ spaceAfter=30,
150
+ alignment=TA_CENTER,
151
+ fontName='Helvetica-Bold'
152
+ )
153
+
154
+ subtitle_style = ParagraphStyle(
155
+ 'CustomSubtitle',
156
+ parent=styles['Heading2'],
157
+ fontSize=14,
158
+ textColor=colors.HexColor('#34495E'),
159
+ spaceAfter=20,
160
+ alignment=TA_CENTER
161
+ )
162
+
163
+ header_style = ParagraphStyle(
164
+ 'CustomHeader',
165
+ parent=styles['Heading2'],
166
+ fontSize=16,
167
+ textColor=colors.HexColor('#2980B9'),
168
+ spaceAfter=12,
169
+ spaceBefore=20,
170
+ fontName='Helvetica-Bold'
171
+ )
172
+
173
+ body_style = ParagraphStyle(
174
+ 'CustomBody',
175
+ parent=styles['Normal'],
176
+ fontSize=11,
177
+ textColor=colors.HexColor('#2C3E50'),
178
+ spaceAfter=6,
179
+ alignment=TA_LEFT,
180
+ leading=14
181
+ )
182
+
183
+ # Header Section
184
+ story.append(Paragraph(APP_NAME, title_style))
185
+ story.append(Paragraph(APP_DESCRIPTION, subtitle_style))
186
+ story.append(Spacer(1, 0.2*inch))
187
+
188
+ # Add decorative line
189
+ line_data = [['', '']]
190
+ line_table = Table(line_data, colWidths=[5*inch])
191
+ line_table.setStyle(TableStyle([
192
+ ('LINEBELOW', (0,0), (-1,-1), 2, colors.HexColor('#3498DB')),
193
+ ]))
194
+ story.append(line_table)
195
+ story.append(Spacer(1, 0.3*inch))
196
+
197
+ # Research Topic
198
+ story.append(Paragraph("Research Topic", header_style))
199
+ story.append(Paragraph(topic, body_style))
200
+ story.append(Spacer(1, 0.2*inch))
201
+
202
+ # Generation Info
203
+ current_time = datetime.now().strftime("%B %d, %Y at %I:%M %p")
204
+ story.append(Paragraph("Generated", header_style))
205
+ story.append(Paragraph(f"{current_time}", body_style))
206
+ story.append(Spacer(1, 0.2*inch))
207
+
208
+ # Sources Summary
209
+ if sources:
210
+ story.append(Paragraph("Sources Analyzed", header_style))
211
+ story.append(Paragraph(f"{len(sources)} reliable sources processed", body_style))
212
+ story.append(Spacer(1, 0.3*inch))
213
+
214
+ story.append(PageBreak())
215
+
216
+ # Main Content
217
+ story.append(Paragraph("Research Report", header_style))
218
+ story.append(Spacer(1, 0.1*inch))
219
+
220
+ # Process markdown content
221
+ lines = content.split('\n')
222
+ for line in lines:
223
+ line = line.strip()
224
+ if not line:
225
+ story.append(Spacer(1, 6))
226
+ continue
227
+
228
+ if line.startswith('# '):
229
+ story.append(Paragraph(line[2:], header_style))
230
+ elif line.startswith('## '):
231
+ story.append(Paragraph(line[3:], header_style))
232
+ elif line.startswith('### '):
233
+ header_3_style = ParagraphStyle(
234
+ 'Header3',
235
+ parent=header_style,
236
+ fontSize=14,
237
+ textColor=colors.HexColor('#7F8C8D')
238
+ )
239
+ story.append(Paragraph(line[4:], header_3_style))
240
+ elif line.startswith('**') and line.endswith('**'):
241
+ bold_style = ParagraphStyle(
242
+ 'Bold',
243
+ parent=body_style,
244
+ fontName='Helvetica-Bold'
245
+ )
246
+ story.append(Paragraph(line[2:-2], bold_style))
247
+ elif line.startswith('- ') or line.startswith('* '):
248
+ bullet_style = ParagraphStyle(
249
+ 'Bullet',
250
+ parent=body_style,
251
+ leftIndent=20,
252
+ bulletIndent=10,
253
+ bulletText='•',
254
+ bulletColor=colors.HexColor('#3498DB')
255
+ )
256
+ story.append(Paragraph(line[2:], bullet_style))
257
+ elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')):
258
+ story.append(Paragraph(line, body_style))
259
+ else:
260
+ # Clean basic markdown formatting
261
+ line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line)
262
+ line = re.sub(r'\*(.*?)\*', r'<i>\1</i>', line)
263
+ story.append(Paragraph(line, body_style))
264
+
265
+ # Footer section
266
+ story.append(PageBreak())
267
+ story.append(Paragraph("Sources", header_style))
268
+
269
+ if sources:
270
+ for i, source in enumerate(sources[:10], 1): # Limit to 10 sources
271
+ source_style = ParagraphStyle(
272
+ 'Source',
273
+ parent=body_style,
274
+ fontSize=10,
275
+ leftIndent=10,
276
+ spaceAfter=8
277
+ )
278
+ title = source.get('title', 'No Title')[:100]
279
+ url = source.get('url', '')
280
+ story.append(Paragraph(f"{i}. {title}", source_style))
281
+ if url:
282
+ url_style = ParagraphStyle(
283
+ 'URL',
284
+ parent=source_style,
285
+ fontSize=9,
286
+ textColor=colors.HexColor('#3498DB'),
287
+ leftIndent=20
288
+ )
289
+ story.append(Paragraph(url, url_style))
290
+
291
+ # Footer
292
+ story.append(Spacer(1, 0.5*inch))
293
+ footer_style = ParagraphStyle(
294
+ 'Footer',
295
+ parent=styles['Normal'],
296
+ fontSize=10,
297
+ textColor=colors.HexColor('#7F8C8D'),
298
+ alignment=TA_CENTER
299
+ )
300
+ story.append(Paragraph(f"Generated by {APP_NAME} {APP_VERSION} | Advanced AI Research Assistant", footer_style))
301
+
302
+ # Build PDF
303
+ doc.build(story)
304
+ return pdf_path
305
+
306
+ except Exception as e:
307
+ print(f"PDF generation error: {e}")
308
+ return None
309
 
310
  # Validate Gemini API key
311
  def validate_api_key(api_key: str) -> tuple[bool, str]:
312
  """Validate if the Gemini API key is working"""
313
  if not api_key or not api_key.strip():
314
+ return False, "API key is empty. Please enter a valid Gemini API key."
315
+
316
+ api_key = api_key.strip()
317
+
318
+ # Basic format checks
319
+ if len(api_key) < 20:
320
+ return False, "❌ API key seems too short. Please check that you copied the complete key."
321
+
322
+ if not api_key.replace('-', '').replace('_', '').isalnum():
323
+ return False, "❌ API key contains invalid characters. Please check your key format."
324
+
325
  try:
326
  # Test the API key with a simple request
327
+ genai.configure(api_key=api_key)
328
  model = genai.GenerativeModel('gemini-2.0-flash')
329
+
330
+ # Try a minimal test generation with timeout
331
+ response = model.generate_content("Test", generation_config={"max_output_tokens": 10})
332
+ return True, "API key is valid and working!"
333
+
334
  except Exception as e:
335
  error_msg = str(e).lower()
336
+ print(f"API Key validation error: {e}") # Debug info
337
+
338
  if "api key not valid" in error_msg or "api_key_invalid" in error_msg:
339
+ return False, """❌ Invalid API key. Please check your Gemini API key and try again.
340
+
341
+ **Common issues:**
342
+ Make sure you copied the ENTIRE key from https://aistudio.google.com/
343
+ Check for extra spaces at the beginning or end
344
+ • Try refreshing the page and copying the key again
345
+ • Make sure you're using the correct API key (not mixing up with other services)"""
346
+
347
+ elif "quota" in error_msg or "limit" in error_msg:
348
+ return False, """❌ API quota exceeded. Your Gemini API usage limit has been reached.
349
+
350
+ **Solutions:**
351
+ • Check your usage at https://aistudio.google.com/
352
+ • Wait for the quota to reset (usually monthly)
353
+ • Consider upgrading your plan if needed"""
354
+
355
+ elif "permission" in error_msg or "forbidden" in error_msg:
356
+ return False, """❌ API key doesn't have required permissions.
357
+
358
+ **Solutions:**
359
+ • Regenerate your API key at https://aistudio.google.com/
360
+ • Make sure the API key is enabled for Gemini API
361
+ • Check if your Google Cloud project has the necessary permissions"""
362
+
363
+ elif "network" in error_msg or "connection" in error_msg or "timeout" in error_msg:
364
+ return False, """❌ Network error. Please check your internet connection and try again.
365
+
366
+ **Troubleshooting:**
367
+ • Check your internet connection
368
+ • Try again in a few minutes
369
+ • Disable VPN if you're using one
370
+ • Check if Google services are accessible in your region"""
371
+
372
+ elif "model" in error_msg:
373
+ return False, """❌ Model not available. The specified Gemini model might not be available.
374
+
375
+ **Solutions:**
376
+ • Try using a different model (like 'gemini-pro')
377
+ • Check Gemini API availability at https://status.cloud.google.com/"""
378
+
379
  else:
380
+ return False, f"""❌ API key validation failed: {str(e)}
381
+
382
+ **Debugging tips:**
383
+ • Make sure you're using a valid Gemini API key from https://aistudio.google.com/
384
+ • Try creating a new API key if the current one doesn't work
385
+ • Check the Google Cloud Console for any billing or permission issues"""
386
 
387
+ # Search the web for relevant information using DuckDuckGo with enhanced targeting for diverse topics
388
+ def web_search(query: str, max_results: int = 15) -> List[Dict[str, str]]:
389
+ """Enhanced search for diverse topics: Politics, History, Technology, Current Affairs, etc."""
390
  try:
391
  with DDGS() as ddgs:
392
+ all_results = []
393
+
394
+ # Detect topic category for specialized search
395
+ topic_type = detect_topic_category(query.lower())
396
+ print(f"Detected topic category: {topic_type}")
397
+
398
+ # Strategy 1: Exact phrase search
399
+ try:
400
+ exact_results = list(ddgs.text(f'"{query}"', max_results=max_results//3))
401
+ all_results.extend(exact_results)
402
+ print(f"Found {len(exact_results)} results from exact search")
403
+ except Exception as e:
404
+ print(f"Exact search error: {e}")
405
+
406
+ # Strategy 2: Topic-specific domain searches
407
+ specialized_domains = get_specialized_domains(topic_type)
408
+ for domain in specialized_domains:
409
+ try:
410
+ domain_results = list(ddgs.text(f'{query} site:{domain}', max_results=2))
411
+ all_results.extend(domain_results)
412
+ if len(all_results) >= max_results:
413
+ break
414
+ except Exception as e:
415
+ print(f"Domain search error for {domain}: {e}")
416
+ continue
417
+
418
+ # Strategy 3: Enhanced keyword searches based on topic
419
+ enhanced_keywords = get_topic_keywords(query, topic_type)
420
+ for keyword in enhanced_keywords[:5]:
421
+ try:
422
+ keyword_results = list(ddgs.text(f'{query} {keyword}', max_results=2))
423
+ all_results.extend(keyword_results)
424
+ if len(all_results) >= max_results:
425
+ break
426
+ except Exception as e:
427
+ print(f"Keyword search error for {keyword}: {e}")
428
+ continue
429
+
430
+ # Strategy 4: Time-based searches for current affairs
431
+ if topic_type in ['current_affairs', 'politics', 'technology', 'news']:
432
+ time_modifiers = ['2024', '2025', 'latest', 'recent', 'current', 'today', 'this year']
433
+ for modifier in time_modifiers[:3]:
434
+ try:
435
+ time_results = list(ddgs.text(f'{query} {modifier}', max_results=2))
436
+ all_results.extend(time_results)
437
+ if len(all_results) >= max_results:
438
+ break
439
+ except Exception as e:
440
+ print(f"Time-based search error for {modifier}: {e}")
441
+ continue
442
+
443
+ # Strategy 5: Academic and authoritative sources
444
+ academic_modifiers = ['analysis', 'research', 'study', 'report', 'comprehensive', 'detailed']
445
+ for modifier in academic_modifiers[:3]:
446
+ try:
447
+ academic_results = list(ddgs.text(f'{query} {modifier}', max_results=2))
448
+ all_results.extend(academic_results)
449
+ if len(all_results) >= max_results:
450
+ break
451
+ except Exception as e:
452
+ print(f"Academic search error for {modifier}: {e}")
453
+ continue
454
+
455
+ # Strategy 6: Fallback comprehensive search
456
+ if len(all_results) < 8:
457
+ try:
458
+ general_results = list(ddgs.text(query, max_results=max_results//2))
459
+ all_results.extend(general_results)
460
+ except Exception as e:
461
+ print(f"General search error: {e}")
462
+
463
+ # Remove duplicates and prioritize authoritative domains
464
+ seen_urls = set()
465
+ unique_results = []
466
+ priority_domains = get_priority_domains_for_topic(topic_type)
467
+
468
+ # First, add results from priority domains
469
+ for result in all_results:
470
+ url = result.get('href', '')
471
+ if url not in seen_urls and any(domain in url for domain in priority_domains):
472
+ seen_urls.add(url)
473
+ unique_results.append(result)
474
+ if len(unique_results) >= max_results:
475
+ break
476
+
477
+ # Then add other unique results
478
+ for result in all_results:
479
+ url = result.get('href', '')
480
+ if url not in seen_urls:
481
+ seen_urls.add(url)
482
+ unique_results.append(result)
483
+ if len(unique_results) >= max_results:
484
+ break
485
+
486
+ print(f"Total unique results found: {len(unique_results)}")
487
+ return unique_results[:max_results]
488
+
489
  except Exception as e:
490
  print(f"Search error: {e}")
491
+ # Final fallback - simple search
492
  try:
493
  with DDGS() as ddgs:
494
  results = list(ddgs.text(query, max_results=min(max_results, 5)))
495
+ print(f"Fallback search found: {len(results)} results")
496
  return results
497
  except Exception as e2:
498
+ print(f"Fallback search error: {e2}")
499
  return []
500
 
501
+ # Fetch and extract content from a URL with better error handling
502
  def fetch_url_content(url: str) -> str:
503
+ """Fetch content from a URL and extract meaningful text with enhanced error handling"""
504
  try:
505
  headers = {
506
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
507
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
508
+ 'Accept-Language': 'en-US,en;q=0.5',
509
+ 'Accept-Encoding': 'gzip, deflate',
510
+ 'Connection': 'keep-alive',
511
+ 'Upgrade-Insecure-Requests': '1',
512
  }
513
+
514
+ # Increase timeout and add retries
515
+ response = requests.get(url, headers=headers, timeout=15, allow_redirects=True)
516
  response.raise_for_status()
517
 
518
  soup = BeautifulSoup(response.content, 'html.parser')
519
 
520
  # Remove unwanted elements
521
+ for element in soup(['script', 'style', 'nav', 'footer', 'header', 'aside', 'iframe', 'noscript']):
522
  element.decompose()
523
 
524
+ # Try to get the main content area first
525
+ main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=['content', 'main', 'body'])
526
+ if main_content:
527
+ text = main_content.get_text()
528
+ else:
529
+ text = soup.get_text()
530
 
531
+ # Clean up text more thoroughly
532
  lines = (line.strip() for line in text.splitlines())
533
  chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
534
+ text = ' '.join(chunk for chunk in chunks if chunk and len(chunk) > 2)
535
+
536
+ # Remove excessive whitespace and clean up
537
+ text = re.sub(r'\s+', ' ', text)
538
+ text = text.strip()
539
 
540
+ # Return more content for better analysis - increased from 5000 to 8000
541
+ return text[:8000] if text else ""
542
+
543
+ except requests.exceptions.Timeout:
544
+ print(f"Timeout error for {url} - trying with shorter timeout")
545
+ try:
546
+ # Retry with shorter timeout
547
+ response = requests.get(url, headers=headers, timeout=8, allow_redirects=True)
548
+ response.raise_for_status()
549
+ soup = BeautifulSoup(response.content, 'html.parser')
550
+ text = soup.get_text()
551
+ text = re.sub(r'\s+', ' ', text.strip())
552
+ return text[:5000] if text else ""
553
+ except Exception as retry_error:
554
+ print(f"Retry failed for {url}: {retry_error}")
555
+ return ""
556
+
557
+ except requests.exceptions.RequestException as e:
558
+ print(f"Request error fetching {url}: {e}")
559
+ return ""
560
  except Exception as e:
561
+ print(f"Unexpected error fetching {url}: {e}")
562
  return ""
563
 
564
+ # Research function using web search and content extraction with enhanced analysis for diverse topics
565
+ def perform_research(query: str, max_sources: int = 12) -> Dict[str, Any]:
566
+ """Perform comprehensive research by searching and extracting content from multiple sources"""
567
+ print(f"🔍 Starting comprehensive research for: {query}")
568
+
569
+ # Detect topic category for better research strategy
570
+ topic_type = detect_topic_category(query.lower())
571
+ print(f"📊 Detected topic category: {topic_type}")
572
 
573
+ # Search for relevant sources with more results to ensure we get at least 10 quality sources
574
+ search_results = web_search(query, max_results=max_sources*4) # Get more results initially
575
+ print(f"📊 Found {len(search_results)} potential sources")
576
 
577
  sources = []
578
  content_chunks = []
579
+ successful_fetches = 0
580
+ failed_fetches = 0
581
 
582
+ for i, result in enumerate(search_results):
583
+ if successful_fetches >= max_sources:
584
+ break
585
+
586
+ url = result.get('href', '')
587
+ title = result.get('title', 'No title')
588
+
589
+ # Skip low-quality or duplicate sources
590
+ if should_skip_source(url, title, sources):
591
+ print(f"⏭️ Skipping {url} - low quality or duplicate")
592
+ continue
593
 
594
+ print(f"🌐 Fetching content from {url}")
595
+ content = fetch_url_content(url)
596
+
597
+ if content and len(content) > 150: # Minimum content threshold
598
+ # Validate content quality for the specific topic
599
+ if is_relevant_content(content, query, topic_type):
600
+ sources.append({
601
+ 'title': title,
602
+ 'url': url,
603
+ 'content': content,
604
+ 'topic_type': topic_type
605
+ })
606
+ content_chunks.append(f"SOURCE {successful_fetches + 1} [{topic_type.upper()}]:\nTITLE: {title}\nURL: {url}\nCONTENT:\n{content}\n{'='*100}\n")
607
+ successful_fetches += 1
608
+ print(f"✅ Successfully extracted {len(content)} characters from source {successful_fetches}")
609
+ else:
610
+ print(f"⚠️ Content not relevant for {query}")
611
+ failed_fetches += 1
612
+ else:
613
+ print(f"⚠️ Skipped {url} - insufficient content ({len(content) if content else 0} chars)")
614
+ failed_fetches += 1
615
 
616
+ # Add small delay to be respectful
617
+ time.sleep(0.3)
618
+
619
+ # If we don't have enough sources, try a broader search
620
+ if successful_fetches < 8:
621
+ print(f"🔄 Only found {successful_fetches} quality sources, trying broader search...")
622
+ broader_results = web_search(f"{query} comprehensive analysis", max_results=15)
623
+
624
+ for result in broader_results:
625
+ if successful_fetches >= max_sources:
626
+ break
627
+
628
+ url = result.get('href', '')
629
+ title = result.get('title', 'No title')
630
+
631
+ if should_skip_source(url, title, sources):
632
+ continue
633
+
634
+ content = fetch_url_content(url)
635
+ if content and len(content) > 100:
636
+ sources.append({
637
+ 'title': title,
638
+ 'url': url,
639
+ 'content': content,
640
+ 'topic_type': 'additional'
641
+ })
642
+ content_chunks.append(f"ADDITIONAL SOURCE {successful_fetches + 1}:\nTITLE: {title}\nURL: {url}\nCONTENT:\n{content}\n{'='*100}\n")
643
+ successful_fetches += 1
644
+ print(f"✅ Additional source {successful_fetches} added")
645
+
646
+ time.sleep(0.3)
647
 
648
  research_context = "\n".join(content_chunks)
649
 
650
+ print(f"📝 Research completed: {successful_fetches} sources processed, {failed_fetches} failed")
651
+ print(f"📊 Total content length: {len(research_context)} characters")
652
+
653
  return {
654
  'sources': sources,
655
  'research_context': research_context,
656
+ 'query': query,
657
+ 'total_sources': successful_fetches,
658
+ 'topic_type': topic_type,
659
+ 'failed_sources': failed_fetches
660
  }
661
 
662
+ def should_skip_source(url: str, title: str, existing_sources: List[Dict]) -> bool:
663
+ """Check if a source should be skipped based on quality and duplication"""
664
+ # Skip if URL already exists
665
+ existing_urls = [source['url'] for source in existing_sources]
666
+ if url in existing_urls:
667
+ return True
668
+
669
+ # Skip low-quality domains
670
+ low_quality_domains = ['pinterest.com', 'instagram.com', 'facebook.com', 'twitter.com', 'tiktok.com', 'reddit.com']
671
+ if any(domain in url for domain in low_quality_domains):
672
+ return True
673
+
674
+ # Skip if title is too short or generic
675
+ if len(title) < 10 or title.lower() in ['no title', 'untitled', 'page not found']:
676
+ return True
677
+
678
+ return False
679
+
680
+ def is_relevant_content(content: str, query: str, topic_type: str) -> bool:
681
+ """Check if content is relevant to the query and topic type"""
682
+ content_lower = content.lower()
683
+ query_words = query.lower().split()
684
+
685
+ # Check if at least 30% of query words appear in content
686
+ matching_words = sum(1 for word in query_words if word in content_lower)
687
+ word_relevance = matching_words / len(query_words) if query_words else 0
688
+
689
+ # Topic-specific relevance keywords
690
+ topic_relevance_keywords = get_topic_keywords(query, topic_type)
691
+ topic_matches = sum(1 for keyword in topic_relevance_keywords if keyword.lower() in content_lower)
692
+
693
+ # Content should have reasonable length and relevance
694
+ return len(content) > 200 and (word_relevance >= 0.3 or topic_matches >= 2)
695
+
696
+ # Generate a research report using Gemini with enhanced topic handling
697
  def generate_research_report(research_data: Dict[str, Any], gemini_api_key: str) -> str:
698
+ """Generate a comprehensive research report using Gemini for diverse topics"""
699
  if not gemini_api_key:
700
  return "❌ Gemini API key is required to generate the report."
701
 
 
708
  # Initialize Gemini (already configured in validation)
709
  model = genai.GenerativeModel('gemini-2.0-flash')
710
 
711
+ topic_type = research_data.get('topic_type', 'general')
712
+ failed_sources = research_data.get('failed_sources', 0)
713
+
714
+ # Create topic-specific prompt
715
  prompt = f"""
716
+ RESEARCH QUERY: {research_data['query']}
717
+ TOPIC CATEGORY: {topic_type.upper()}
718
+ TOTAL SOURCES ANALYZED: {research_data.get('total_sources', len(research_data['sources']))}
719
+ FAILED SOURCES: {failed_sources}
720
 
721
+ COMPREHENSIVE RESEARCH DATA FROM MULTIPLE AUTHORITATIVE SOURCES:
722
  {research_data['research_context']}
723
 
724
+ INSTRUCTIONS FOR {topic_type.upper()} RESEARCH REPORT:
725
+ Based on the above research data, create a comprehensive, well-structured report analyzing ALL the information provided. This is a {topic_type} research topic, so focus on relevant aspects for this domain.
726
+
727
+ Your report structure should include:
728
+
729
+ 1. **EXECUTIVE SUMMARY**
730
+ - Key findings and main points about {research_data['query']}
731
+ - Critical insights and takeaways
732
+ - Brief overview of what the research reveals
733
+
734
+ 2. **DETAILED ANALYSIS**
735
+ - In-depth examination of all collected information
736
+ - Multiple perspectives and viewpoints found in sources
737
+ - Connections between different pieces of information
738
+ - Contradictions or debates if any exist
739
+
740
+ 3. **BACKGROUND & CONTEXT**
741
+ - Historical background (if relevant)
742
+ - Current situation and status
743
+ - Relevant context that helps understand the topic
744
+
745
+ 4. **KEY FINDINGS & INSIGHTS**
746
+ - Most important discoveries from the research
747
+ - Patterns and trends identified
748
+ - Significant facts and statistics
749
+ - Expert opinions and analysis
750
+
751
+ 5. **CURRENT STATUS & DEVELOPMENTS**
752
+ - Latest information and recent developments
753
+ - Current state of affairs
754
+ - Recent changes or updates
755
+
756
+ 6. **DIFFERENT PERSPECTIVES**
757
+ - Various viewpoints found in sources
758
+ - Debates and discussions around the topic
759
+ - Conflicting information (if any)
760
+
761
+ 7. **IMPLICATIONS & SIGNIFICANCE**
762
+ - Why this topic matters
763
+ - Impact and consequences
764
+ - Future implications
765
+
766
+ 8. **DETAILED BREAKDOWN**
767
+ - Specific details from each major source
768
+ - Technical information (if applicable)
769
+ - Statistics and data points
770
+ - Quotes and specific information
771
 
772
+ 9. **CONCLUSIONS**
773
+ - Summary of what was discovered
774
+ - Final thoughts and analysis
775
+ - Gaps in information (if any)
776
+
777
+ 10. **SOURCES & REFERENCES**
778
+ - List all sources with proper attribution
779
+ - Include URLs for verification
780
+ - Note the reliability and type of each source
781
+
782
+ FORMATTING REQUIREMENTS:
783
+ - Use clear Markdown formatting with headers (##), subheaders (###), and bullet points
784
+ - Make the content engaging, informative, and well-organized
785
+ - Include specific details, examples, and quotes from the sources
786
+ - Highlight important information with **bold text**
787
+ - Use bullet points for lists and key points
788
+ - Organize information logically and coherently
789
+ - If information is conflicting, present both sides
790
+ - If insufficient information is available for any section, clearly state what could not be determined
791
+
792
+ CONTENT REQUIREMENTS:
793
+ - Base your analysis ONLY on the provided source content
794
+ - Do not make assumptions or add information not present in the sources
795
+ - Include specific details and examples from multiple sources
796
+ - Synthesize information from all sources, don't just summarize each one separately
797
+ - Maintain objectivity and present facts as found in sources
798
+ - If sources contradict each other, present both perspectives
799
+ - Focus on creating a comprehensive understanding of {research_data['query']}
800
+
801
+ TOPIC-SPECIFIC FOCUS FOR {topic_type.upper()}:
802
+ {get_topic_specific_instructions(topic_type)}
803
+
804
+ Remember: This report should be thorough, well-researched, and provide real value to someone wanting to understand {research_data['query']} comprehensively.
805
  """
806
 
807
  response = model.generate_content(prompt)
808
  return response.text
809
  except Exception as e:
810
  error_msg = str(e).lower()
811
+ print(f"Report generation error: {e}") # Debug info
812
+
813
  if "api key not valid" in error_msg or "api_key_invalid" in error_msg:
814
+ return """❌ Invalid API key during report generation.
815
+
816
+ **Common issues:**
817
+ Your API key may have expired or been revoked
818
+ Check if you copied the complete key
819
+ • Try regenerating your API key at https://aistudio.google.com/"""
820
+
821
+ elif "quota" in error_msg or "limit" in error_msg:
822
+ return """❌ API quota exceeded during report generation.
823
+
824
+ **Solutions:**
825
+ • Check your usage at https://aistudio.google.com/
826
+ • Wait for the quota to reset (usually monthly)
827
+ • Consider upgrading your plan if needed"""
828
+
829
+ elif "permission" in error_msg or "forbidden" in error_msg:
830
+ return """❌ API key doesn't have required permissions for report generation.
831
+
832
+ **Solutions:**
833
+ • Regenerate your API key at https://aistudio.google.com/
834
+ • Make sure the API key is enabled for Gemini API"""
835
+
836
+ elif "network" in error_msg or "connection" in error_msg or "timeout" in error_msg:
837
+ return """❌ Network error during report generation.
838
+
839
+ **Troubleshooting:**
840
+ • Check your internet connection
841
+ • Try again in a few minutes
842
+ • The report generation process may take some time"""
843
+
844
+ elif "model" in error_msg:
845
+ return """❌ Model not available for report generation.
846
+
847
+ **Solutions:**
848
+ • Try using a different model
849
+ • Check Gemini API availability at https://status.cloud.google.com/"""
850
+
851
  else:
852
+ return f"""❌ Error generating report: {str(e)}
853
+
854
+ **Debugging tips:**
855
+ • Try with a shorter research topic
856
+ • Check your internet connection
857
+ • Make sure your API key has sufficient quota"""
858
+
859
+ def get_topic_specific_instructions(topic_type: str) -> str:
860
+ """Get specific instructions based on topic category"""
861
+ instructions = {
862
+ 'politics': """
863
+ - Focus on political implications, policy details, and governmental aspects
864
+ - Include information about key political figures, parties, and institutions
865
+ - Analyze policy impacts and political consequences
866
+ - Present multiple political perspectives objectively
867
+ - Include information about voting patterns, polls, or public opinion if available
868
+ """,
869
+ 'history': """
870
+ - Provide chronological context and timeline of events
871
+ - Include historical significance and long-term impacts
872
+ - Mention key historical figures, dates, and places
873
+ - Analyze causes and effects of historical events
874
+ - Connect historical events to modern implications
875
+ """,
876
+ 'geography': """
877
+ - Include specific geographical data, coordinates, and locations
878
+ - Provide demographic, climate, and physical geography information
879
+ - Discuss economic geography and natural resources
880
+ - Include maps, borders, and territorial information
881
+ - Analyze geographical impacts on society and economy
882
+ """,
883
+ 'current_affairs': """
884
+ - Focus on the most recent developments and breaking news
885
+ - Include timeline of recent events
886
+ - Analyze immediate impacts and short-term consequences
887
+ - Provide context for why this is currently significant
888
+ - Include quotes from recent statements or press releases
889
+ """,
890
+ 'technology': """
891
+ - Focus on technical specifications, capabilities, and limitations
892
+ - Include information about development timeline and key innovators
893
+ - Analyze technological implications and future potential
894
+ - Discuss adoption rates, market impact, and competitive landscape
895
+ - Include technical details and how the technology works
896
+ """,
897
+ 'war': """
898
+ - Provide strategic analysis and military context
899
+ - Include information about forces, tactics, and equipment involved
900
+ - Analyze geopolitical implications and international responses
901
+ - Discuss humanitarian impacts and civilian consequences
902
+ - Present timeline of conflict development
903
+ """,
904
+ 'economics': """
905
+ - Include specific economic data, statistics, and indicators
906
+ - Analyze market trends, financial impacts, and economic consequences
907
+ - Discuss effects on different sectors and stakeholders
908
+ - Include information about economic policies and their outcomes
909
+ - Provide context about economic significance and implications
910
+ """,
911
+ 'science': """
912
+ - Focus on scientific methodology, research findings, and evidence
913
+ - Include information about research institutions and scientists involved
914
+ - Explain scientific concepts and their implications
915
+ - Discuss peer review status and scientific consensus
916
+ - Analyze potential applications and future research directions
917
+ """
918
+ }
919
+ return instructions.get(topic_type, "Focus on providing comprehensive, factual information with proper context and analysis.")
920
 
921
  # Main research function
922
+ def run_research(topic: str, gemini_api_key: str, download_format: str = "markdown"):
923
  """Run the complete research process"""
924
  if not gemini_api_key.strip():
925
+ return "❌ Please enter your Gemini API key.", None, None, gr.update(visible=False), gr.update(visible=False)
926
 
927
  if not topic.strip():
928
+ return "❌ Please enter a research topic.", None, None, gr.update(visible=False), gr.update(visible=False)
929
 
930
  # First validate the API key
931
  is_valid, validation_message = validate_api_key(gemini_api_key)
932
  if not is_valid:
933
+ return f"❌ {validation_message}", None, None, gr.update(visible=False), gr.update(visible=False)
934
 
935
  try:
936
  # Perform research
937
+ print(f"Starting research for: {topic}")
938
  research_data = perform_research(topic)
939
 
940
  if not research_data['sources']:
941
+ return "❌ No relevant sources found. Please try a different search term.", None, None, gr.update(visible=False), gr.update(visible=False)
942
+
943
+ print(f"Found {len(research_data['sources'])} sources, generating report...")
944
 
945
  # Generate report
946
  report = generate_research_report(research_data, gemini_api_key)
947
 
948
+ # Check if report generation was successful
949
+ if report.startswith("❌"):
950
+ return report, None, None, gr.update(visible=False), gr.update(visible=False)
951
 
952
+ # Create safe downloadable filenames from the TOPIC, not the report content
953
+ base_filename = sanitize_filename(topic)
954
+ if not base_filename.endswith('.md'):
955
+ base_filename = base_filename.replace('.md', '') + '_report.md'
956
+
957
+ pdf_path = None
958
+ try:
959
+ # Generate PDF using the original topic for filename
960
+ pdf_path = create_pdf_report(report, topic, research_data['sources'], base_filename)
961
+ print(f"PDF generated successfully: {pdf_path}")
962
+ except Exception as pdf_error:
963
+ print(f"PDF generation failed: {pdf_error}")
964
+ # Continue without PDF if it fails
965
+
966
+ print(f"Research completed successfully. MD: {base_filename}")
967
+
968
+ return report, base_filename, pdf_path, gr.update(visible=True), gr.update(visible=True)
969
 
970
  except Exception as e:
971
+ print(f"Research error: {e}") # Debug info
972
+ error_msg = f"❌ An error occurred during research: {str(e)}"
973
+ return error_msg, None, None, gr.update(visible=False), gr.update(visible=False)
974
 
975
+ # Gradio interface with dark theme
976
  def create_interface():
977
+ # Dark theme CSS
978
+ dark_css = """
979
+ /* Dark theme base */
980
+ .gradio-container {
981
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%) !important;
982
+ min-height: 100vh;
983
+ color: white !important;
984
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
985
+ }
986
+
987
+ /* All blocks and containers */
988
+ .block, .gr-box, .gr-form, .gr-panel {
989
+ background: rgba(255, 255, 255, 0.05) !important;
990
+ border: 1px solid rgba(255, 255, 255, 0.1) !important;
991
+ border-radius: 15px !important;
992
+ backdrop-filter: blur(10px) !important;
993
+ padding: 1.5rem !important;
994
+ margin: 0.5rem !important;
995
+ }
996
+
997
+ /* Text colors - ALL WHITE */
998
+ body, p, span, div, label, h1, h2, h3, h4, h5, h6 {
999
+ color: white !important;
1000
+ }
1001
+
1002
+ .gr-markdown, .gr-markdown * {
1003
+ color: white !important;
1004
+ background: transparent !important;
1005
+ }
1006
+
1007
+ .gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
1008
+ color: #64b5f6 !important;
1009
+ border-bottom: 1px solid rgba(255, 255, 255, 0.2) !important;
1010
+ }
1011
+
1012
+ /* Input fields */
1013
+ .gr-textbox, .gr-textbox input, .gr-textbox textarea {
1014
+ background: rgba(255, 255, 255, 0.1) !important;
1015
+ border: 1px solid rgba(255, 255, 255, 0.3) !important;
1016
+ border-radius: 10px !important;
1017
+ color: white !important;
1018
+ padding: 12px !important;
1019
+ }
1020
+
1021
+ .gr-textbox input::placeholder, .gr-textbox textarea::placeholder {
1022
+ color: rgba(255, 255, 255, 0.6) !important;
1023
+ }
1024
+
1025
+ .gr-textbox input:focus, .gr-textbox textarea:focus {
1026
+ border-color: #64b5f6 !important;
1027
+ box-shadow: 0 0 10px rgba(100, 181, 246, 0.3) !important;
1028
+ background: rgba(255, 255, 255, 0.15) !important;
1029
+ }
1030
+
1031
+ /* Buttons */
1032
+ .gr-button {
1033
+ border-radius: 25px !important;
1034
+ padding: 12px 24px !important;
1035
+ font-weight: 600 !important;
1036
+ text-transform: uppercase !important;
1037
+ letter-spacing: 0.5px !important;
1038
+ transition: all 0.3s ease !important;
1039
+ border: none !important;
1040
+ color: white !important;
1041
+ }
1042
+
1043
+ .gr-button-primary {
1044
+ background: linear-gradient(135deg, #64b5f6, #42a5f5) !important;
1045
+ box-shadow: 0 4px 15px rgba(100, 181, 246, 0.4) !important;
1046
+ }
1047
+
1048
+ .gr-button-primary:hover {
1049
+ background: linear-gradient(135deg, #42a5f5, #2196f3) !important;
1050
+ transform: translateY(-2px) !important;
1051
+ box-shadow: 0 6px 20px rgba(100, 181, 246, 0.6) !important;
1052
+ }
1053
+
1054
+ .gr-button-secondary {
1055
+ background: linear-gradient(135deg, #546e7a, #37474f) !important;
1056
+ box-shadow: 0 4px 15px rgba(84, 110, 122, 0.4) !important;
1057
+ }
1058
+
1059
+ .gr-button-secondary:hover {
1060
+ background: linear-gradient(135deg, #37474f, #263238) !important;
1061
+ transform: translateY(-2px) !important;
1062
+ }
1063
+
1064
+ /* Accordion */
1065
+ .gr-accordion {
1066
+ background: rgba(255, 255, 255, 0.05) !important;
1067
+ border: 1px solid rgba(255, 255, 255, 0.1) !important;
1068
+ border-radius: 12px !important;
1069
+ }
1070
+
1071
+ .gr-accordion summary {
1072
+ color: white !important;
1073
+ background: rgba(255, 255, 255, 0.1) !important;
1074
+ padding: 1rem !important;
1075
+ border-radius: 10px !important;
1076
+ }
1077
+
1078
+ /* Feature cards */
1079
+ .feature-card {
1080
+ background: rgba(100, 181, 246, 0.1) !important;
1081
+ border: 1px solid rgba(100, 181, 246, 0.3) !important;
1082
+ border-radius: 12px !important;
1083
+ padding: 1.5rem !important;
1084
+ margin: 1rem 0 !important;
1085
+ border-left: 4px solid #64b5f6 !important;
1086
+ backdrop-filter: blur(10px) !important;
1087
+ }
1088
+
1089
+ .feature-card h3, .feature-card h4 {
1090
+ color: #64b5f6 !important;
1091
+ margin-bottom: 1rem !important;
1092
+ }
1093
+
1094
+ .feature-card ul li {
1095
+ color: rgba(255, 255, 255, 0.9) !important;
1096
+ margin-bottom: 0.5rem !important;
1097
+ }
1098
+
1099
+ /* Status indicators */
1100
+ .status-success {
1101
+ background: rgba(76, 175, 80, 0.2) !important;
1102
+ border: 1px solid #4caf50 !important;
1103
+ border-left: 4px solid #4caf50 !important;
1104
+ color: #a5d6a7 !important;
1105
+ }
1106
+
1107
+ .status-error {
1108
+ background: rgba(244, 67, 54, 0.2) !important;
1109
+ border: 1px solid #f44336 !important;
1110
+ border-left: 4px solid #f44336 !important;
1111
+ color: #ef9a9a !important;
1112
+ }
1113
+
1114
+ /* Hero section */
1115
+ .hero-section {
1116
+ background: linear-gradient(135deg, #1565c0, #1976d2, #1e88e5) !important;
1117
+ border-radius: 15px !important;
1118
+ padding: 2rem !important;
1119
+ margin-bottom: 2rem !important;
1120
+ color: white !important;
1121
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3) !important;
1122
+ text-align: center !important;
1123
+ }
1124
+
1125
+ /* Download section */
1126
+ .download-section {
1127
+ background: rgba(100, 181, 246, 0.1) !important;
1128
+ border: 1px solid rgba(100, 181, 246, 0.3) !important;
1129
+ border-radius: 12px !important;
1130
+ padding: 1.5rem !important;
1131
+ text-align: center !important;
1132
+ color: white !important;
1133
+ }
1134
+
1135
+ /* Markdown content area */
1136
+ .gr-markdown {
1137
+ background: rgba(255, 255, 255, 0.05) !important;
1138
+ border: 1px solid rgba(255, 255, 255, 0.1) !important;
1139
+ border-radius: 10px !important;
1140
+ padding: 1.5rem !important;
1141
+ max-height: 500px !important;
1142
+ overflow-y: auto !important;
1143
+ }
1144
+
1145
+ /* Responsive design */
1146
+ @media (max-width: 768px) {
1147
+ .gradio-container {
1148
+ padding: 0.5rem !important;
1149
+ }
1150
+
1151
+ .block {
1152
+ margin: 0.25rem !important;
1153
+ padding: 1rem !important;
1154
+ }
1155
 
1156
+ .hero-section {
1157
+ padding: 1rem !important;
1158
+ }
1159
+
1160
+ .feature-card {
1161
+ padding: 1rem !important;
1162
+ margin: 0.5rem 0 !important;
1163
+ }
1164
+ }
1165
+
1166
+ /* Scrollbar styling */
1167
+ ::-webkit-scrollbar {
1168
+ width: 8px;
1169
+ }
1170
+
1171
+ ::-webkit-scrollbar-track {
1172
+ background: rgba(255, 255, 255, 0.1);
1173
+ border-radius: 4px;
1174
+ }
1175
+
1176
+ ::-webkit-scrollbar-thumb {
1177
+ background: rgba(100, 181, 246, 0.6);
1178
+ border-radius: 4px;
1179
+ }
1180
+
1181
+ ::-webkit-scrollbar-thumb:hover {
1182
+ background: rgba(100, 181, 246, 0.8);
1183
+ }
1184
+ """
1185
+
1186
+ with gr.Blocks(
1187
+ title=f"{APP_NAME} | Advanced AI Research Assistant",
1188
+ theme=gr.themes.Base(
1189
+ primary_hue="blue",
1190
+ secondary_hue="gray",
1191
+ neutral_hue="slate",
1192
+ text_size="md",
1193
+ radius_size="lg",
1194
+ spacing_size="lg"
1195
+ ).set(
1196
+ body_background_fill="linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%)",
1197
+ block_background_fill="rgba(255, 255, 255, 0.05)",
1198
+ block_border_color="rgba(255, 255, 255, 0.1)",
1199
+ block_radius="15px",
1200
+ button_primary_background_fill="linear-gradient(135deg, #64b5f6, #42a5f5)",
1201
+ button_primary_text_color="white",
1202
+ input_background_fill="rgba(255, 255, 255, 0.1)",
1203
+ input_border_color="rgba(255, 255, 255, 0.3)",
1204
+ body_text_color="white",
1205
+ block_label_text_color="white"
1206
+ ),
1207
+ css=dark_css
1208
+ ) as demo:
1209
+
1210
+ # Hero Section
1211
  with gr.Row():
1212
+ with gr.Column():
1213
+ gr.HTML(f"""
1214
+ <div class="hero-section">
1215
+ <h1 style="font-size: 3rem; font-weight: bold; margin: 0; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">
1216
+ 🔬 {APP_NAME}
1217
+ </h1>
1218
+ <h2 style="font-size: 1.5rem; margin: 0.5rem 0; opacity: 0.9;">
1219
+ {APP_DESCRIPTION}
1220
+ </h2>
1221
+ <p style="font-size: 1.1rem; margin: 1rem 0; opacity: 0.8;">
1222
+ Powered by Google Gemini AI & Advanced Web Research
1223
+ </p>
1224
+ </div>
1225
+ """)
1226
+
1227
+ # Features Overview
1228
+ with gr.Row():
1229
+ with gr.Column():
1230
+ gr.HTML("""
1231
+ <div class="feature-card">
1232
+ <h3>🎯 What this tool does:</h3>
1233
+ <ul style="margin: 1rem 0;">
1234
+ <li><strong>🔍 Intelligent Search:</strong> Uses DuckDuckGo to find the most relevant sources</li>
1235
+ <li><strong>📊 Content Analysis:</strong> Extracts and processes content from multiple websites</li>
1236
+ <li><strong>🤖 AI Synthesis:</strong> Uses Google Gemini to create comprehensive reports</li>
1237
+ <li><strong>📄 Professional Output:</strong> Generates both Markdown and PDF reports</li>
1238
+ <li><strong>⚡ Fast & Reliable:</strong> Automated research in minutes, not hours</li>
1239
+ </ul>
1240
+ </div>
1241
+ """)
1242
+
1243
+ # Simple API Key Section
1244
+ with gr.Row():
1245
+ with gr.Column():
1246
+ gr.HTML("""
1247
+ <div class="feature-card">
1248
+ <h3>� API Key Setup</h3>
1249
+ <p>Get your free Gemini API key from <a href="https://aistudio.google.com/" target="_blank" style="color: #64b5f6;">Google AI Studio</a></p>
1250
+ </div>
1251
+ """)
1252
 
1253
+ with gr.Row():
1254
+ with gr.Column(scale=3):
1255
+ gemini_key = gr.Textbox(
1256
+ label="🔐 Enter your Gemini API Key",
1257
+ type="password",
1258
+ placeholder="Paste your API key here...",
1259
+ container=True
1260
+ )
1261
+ with gr.Column(scale=1):
1262
+ validate_btn = gr.Button(
1263
+ "🔍 Validate",
1264
+ variant="secondary",
1265
+ size="lg"
1266
+ )
1267
 
1268
+ validation_output = gr.HTML(visible=False)
1269
+
1270
+ # Main Research Interface
1271
+ gr.HTML("<h2 style='text-align: center; color: #2c3e50; margin: 2rem 0;'>🔬 Start Your Research</h2>")
1272
+
1273
+ with gr.Row():
1274
  with gr.Column(scale=2):
1275
  research_topic = gr.Textbox(
1276
+ label="🎯 Research Topic",
1277
+ placeholder="Enter your research topic here... (e.g., 'Latest developments in quantum computing', 'Climate change solutions 2024', 'AI trends in healthcare')",
1278
+ lines=3,
1279
+ container=True
1280
  )
1281
 
1282
+ with gr.Row():
1283
+ research_btn = gr.Button(
1284
+ "🚀 Start Deep Research",
1285
+ variant="primary",
1286
+ size="lg",
1287
+ scale=2
1288
+ )
1289
+ with gr.Column(scale=1):
1290
+ gr.HTML("<div style='padding: 1rem;'></div>")
1291
+
1292
+ with gr.Column(scale=1):
1293
+ gr.HTML("""
1294
+ <div class="feature-card">
1295
+ <h4>💡 Research Tips:</h4>
1296
+ <ul style="font-size: 0.9rem;">
1297
+ <li><strong>Be Specific:</strong> "AI in healthcare 2024" vs "AI"</li>
1298
+ <li><strong>Include Context:</strong> Add year, location, or specific aspect</li>
1299
+ <li><strong>Ask Questions:</strong> "What is the impact of...?"</li>
1300
+ <li><strong>Current Events:</strong> Include "latest" or "current"</li>
1301
+ <li><strong>Multiple Angles:</strong> "Causes and solutions of..."</li>
1302
+ </ul>
1303
+ <div style="margin-top: 1rem; padding: 0.8rem; background: rgba(76, 175, 80, 0.1); border-radius: 6px; border-left: 3px solid #4caf50;">
1304
+ <strong>📊 Research Power:</strong><br>
1305
+ <small>10+ sources • Topic categorization • Authoritative domains • AI synthesis</small>
1306
+ </div>
1307
+ </div>
1308
+ """)
1309
+
1310
+ # Progress and Results Section
1311
+ with gr.Row():
1312
+ with gr.Column():
1313
+ progress_html = gr.HTML(visible=False)
1314
 
1315
  output = gr.Markdown(
1316
+ value="Your comprehensive research report will appear here...",
1317
+ label="📊 Research Report",
1318
+ container=True,
1319
+ height=400
1320
  )
1321
+
1322
+ # Download Section
1323
+ with gr.Row():
1324
+ with gr.Column():
1325
+ download_section = gr.HTML(visible=False)
1326
 
1327
+ with gr.Row():
1328
+ with gr.Column():
1329
+ download_md_btn = gr.DownloadButton(
1330
+ "📝 Download Markdown",
1331
+ visible=False,
1332
+ variant="secondary",
1333
+ size="lg"
1334
+ )
1335
+ with gr.Column():
1336
+ download_pdf_btn = gr.DownloadButton(
1337
+ "📄 Download PDF Report",
1338
+ visible=False,
1339
+ variant="primary",
1340
+ size="lg"
1341
+ )
1342
+
1343
+ # Footer
1344
+ gr.HTML(f"""
1345
+ <div style="text-align: center; padding: 2rem; color: #7f8c8d; border-top: 1px solid #ecf0f1; margin-top: 3rem;">
1346
+ <p>🔬 <strong>{APP_NAME} {APP_VERSION}</strong> | Advanced AI Research Assistant</p>
1347
+ <p>Powered by Google Gemini AI • Built with ❤️ for researchers worldwide</p>
1348
+ </div>
1349
+ """)
1350
 
1351
+ # Event Handlers
1352
+ def validate_key_handler(api_key):
1353
  if not api_key:
1354
+ return gr.update(
1355
+ visible=True,
1356
+ value='<div class="status-error"><h4>❌ API Key Required</h4><p>Please enter your Gemini API key above.</p></div>'
1357
+ )
1358
 
1359
  is_valid, message = validate_api_key(api_key)
1360
  if is_valid:
1361
+ return gr.update(
1362
+ visible=True,
1363
+ value=f'<div class="status-success"><h4>✅ API Key Valid!</h4><p>{message}</p><p>You\'re ready to start researching!</p></div>'
1364
+ )
1365
  else:
1366
+ return gr.update(
1367
+ visible=True,
1368
+ value=f'<div class="status-error"><h4>❌ API Key Issue</h4><div style="white-space: pre-line;">{message}</div></div>'
1369
+ )
1370
+
1371
+ def research_handler(topic, api_key):
1372
+ if not api_key.strip():
1373
+ return (
1374
+ "❌ Please enter and validate your Gemini API key first.",
1375
+ None, None,
1376
+ gr.update(visible=False),
1377
+ gr.update(visible=False),
1378
+ gr.update(visible=False)
1379
+ )
1380
+
1381
+ if not topic.strip():
1382
+ return (
1383
+ "❌ Please enter a research topic.",
1384
+ None, None,
1385
+ gr.update(visible=False),
1386
+ gr.update(visible=False),
1387
+ gr.update(visible=False)
1388
+ )
1389
+
1390
+ # Show progress
1391
+ progress_msg = f"""
1392
+ <div class="feature-card">
1393
+ <h4>🔄 Research in Progress...</h4>
1394
+ <p>📊 Analyzing: <strong>{topic}</strong></p>
1395
+ <p>⏳ This may take 1-2 minutes. Please wait...</p>
1396
+ </div>
1397
+ """
1398
+
1399
+ return run_research(topic, api_key)
1400
 
1401
+ # Wire up events
1402
  validate_btn.click(
1403
+ fn=validate_key_handler,
1404
  inputs=[gemini_key],
1405
+ outputs=[validation_output]
1406
  )
1407
 
 
1408
  research_btn.click(
1409
  fn=run_research,
1410
  inputs=[research_topic, gemini_key],
1411
+ outputs=[output, download_md_btn, download_pdf_btn, download_md_btn, download_pdf_btn]
1412
  )
1413
 
1414
+ # Download handlers
1415
+ def create_md_file(content):
1416
+ if content and content.strip():
1417
+ return content
1418
+ return "No content available"
1419
 
1420
+ def get_pdf_file(pdf_path):
1421
+ if pdf_path and os.path.exists(pdf_path):
1422
+ return pdf_path
1423
+ return None
1424
+
1425
+ download_md_btn.click(
1426
+ fn=create_md_file,
1427
  inputs=[output],
1428
+ outputs=[download_md_btn]
1429
+ )
1430
+
1431
+ download_pdf_btn.click(
1432
+ fn=get_pdf_file,
1433
+ inputs=[download_pdf_btn],
1434
+ outputs=[download_pdf_btn]
1435
  )
1436
 
1437
  return demo