hari7261 commited on
Commit
015383e
·
verified ·
1 Parent(s): 6cc8618

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +202 -202
app.py CHANGED
@@ -1,203 +1,203 @@
1
- import gradio as gr
2
- import google.generativeai as genai
3
- from duckduckgo_search import DDGS
4
- import requests
5
- from bs4 import BeautifulSoup
6
- import time
7
- from urllib.parse import urlparse
8
- import re
9
- import json
10
- from typing import List, Dict, Any
11
-
12
- # Search the web for relevant information using DuckDuckGo
13
- def web_search(query: str, max_results: int = 10) -> List[Dict[str, str]]:
14
- """Search the web for relevant information using DuckDuckGo"""
15
- try:
16
- with DDGS() as ddgs:
17
- results = list(ddgs.text(query, max_results=max_results))
18
- return results
19
- except Exception as e:
20
- print(f"Search error: {e}")
21
- return []
22
-
23
- # Fetch and extract content from a URL
24
- def fetch_url_content(url: str) -> str:
25
- """Fetch content from a URL and extract meaningful text"""
26
- try:
27
- headers = {
28
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
29
- }
30
- response = requests.get(url, headers=headers, timeout=10)
31
- response.raise_for_status()
32
-
33
- soup = BeautifulSoup(response.content, 'html.parser')
34
-
35
- # Remove unwanted elements
36
- for element in soup(['script', 'style', 'nav', 'footer', 'header', 'aside']):
37
- element.decompose()
38
-
39
- # Get text content
40
- text = soup.get_text()
41
-
42
- # Clean up text
43
- lines = (line.strip() for line in text.splitlines())
44
- chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
45
- text = ' '.join(chunk for chunk in chunks if chunk)
46
-
47
- return text[:5000] # Limit content length to avoid token limits
48
- except Exception as e:
49
- print(f"Error fetching {url}: {e}")
50
- return ""
51
-
52
- # Research function using web search and content extraction
53
- def perform_research(query: str, max_sources: int = 5) -> Dict[str, Any]:
54
- """Perform research by searching and extracting content from multiple sources"""
55
- print(f"Researching: {query}")
56
-
57
- # Search for relevant sources
58
- search_results = web_search(query, max_results=max_sources*2) # Get extra results to account for failed fetches
59
-
60
- sources = []
61
- content_chunks = []
62
-
63
- for i, result in enumerate(search_results[:max_sources]):
64
- print(f"Fetching content from {result['href']}")
65
- content = fetch_url_content(result['href'])
66
-
67
- if content and len(content) > 200: # Only include if we got meaningful content
68
- sources.append({
69
- 'title': result.get('title', 'No title'),
70
- 'url': result.get('href', ''),
71
- 'content': content
72
- })
73
- content_chunks.append(f"SOURCE {i+1}:\nURL: {result.get('href', '')}\nCONTENT:\n{content}\n")
74
-
75
- # Be polite with delays between requests
76
- time.sleep(1)
77
-
78
- research_context = "\n".join(content_chunks)
79
-
80
- return {
81
- 'sources': sources,
82
- 'research_context': research_context,
83
- 'query': query
84
- }
85
-
86
- # Generate a research report using Gemini
87
- def generate_research_report(research_data: Dict[str, Any], gemini_api_key: str) -> str:
88
- """Generate a comprehensive research report using Gemini"""
89
- if not gemini_api_key:
90
- return "Gemini API key is required to generate the report."
91
-
92
- try:
93
- # Initialize Gemini
94
- genai.configure(api_key=gemini_api_key)
95
- model = genai.GenerativeModel('gemini-pro')
96
-
97
- prompt = f"""
98
- RESEARCH TOPIC: {research_data['query']}
99
-
100
- RESEARCH CONTEXT FROM VARIOUS SOURCES:
101
- {research_data['research_context']}
102
-
103
- Please analyze this research and create a comprehensive, well-structured report with:
104
- 1. Key findings and insights
105
- 2. Detailed explanations of complex concepts
106
- 3. Relevant examples and case studies
107
- 4. Real-world applications
108
- 5. Future predictions and trends
109
- 6. Citations for all sources with links
110
-
111
- Format your response using Markdown with appropriate headings, subheadings, bullet points, and bold text for emphasis.
112
- """
113
-
114
- response = model.generate_content(prompt)
115
- return response.text
116
- except Exception as e:
117
- return f"Error generating report: {str(e)}"
118
-
119
- # Main research function
120
- def run_research(topic: str, gemini_api_key: str):
121
- """Run the complete research process"""
122
- if not gemini_api_key:
123
- return "Please enter your Gemini API key.", None, gr.update(visible=False)
124
-
125
- if not topic:
126
- return "Please enter a research topic.", None, gr.update(visible=False)
127
-
128
- try:
129
- # Perform research
130
- research_data = perform_research(topic)
131
-
132
- if not research_data['sources']:
133
- return "No relevant sources found. Please try a different search term.", None, gr.update(visible=False)
134
-
135
- # Generate report
136
- report = generate_research_report(research_data, gemini_api_key)
137
-
138
- # Create a downloadable file
139
- filename = f"{topic.replace(' ', '_')}_report.md"
140
-
141
- return report, filename, gr.update(visible=True)
142
-
143
- except Exception as e:
144
- error_msg = f"An error occurred: {str(e)}"
145
- return error_msg, None, gr.update(visible=False)
146
-
147
- # Gradio interface
148
- def create_interface():
149
- with gr.Blocks(title="Gemini Deep Research Agent", theme=gr.themes.Soft()) as demo:
150
- gr.Markdown("# 📘 Gemini Deep Research Agent")
151
- gr.Markdown("This agent performs deep research on any topic using Google's Gemini and DuckDuckGo search")
152
-
153
- with gr.Row():
154
- with gr.Column(scale=1):
155
- gr.Markdown("## API Configuration")
156
- gemini_key = gr.Textbox(
157
- label="Gemini API Key",
158
- type="password",
159
- placeholder="Enter your Gemini API key (get it from https://aistudio.google.com/)"
160
- )
161
-
162
- with gr.Column(scale=2):
163
- research_topic = gr.Textbox(
164
- label="Research Topic",
165
- placeholder="e.g., Latest developments in AI",
166
- lines=2
167
- )
168
-
169
- research_btn = gr.Button("Start Research", variant="primary")
170
-
171
- output = gr.Markdown(
172
- label="Research Report",
173
- value="Your research report will appear here..."
174
- )
175
-
176
- download_btn = gr.DownloadButton(
177
- "Download Report",
178
- visible=False
179
- )
180
-
181
- # Set up the button action
182
- research_btn.click(
183
- fn=run_research,
184
- inputs=[research_topic, gemini_key],
185
- outputs=[output, download_btn, download_btn]
186
- )
187
-
188
- # Set up download functionality
189
- def create_file(content):
190
- return content
191
-
192
- download_btn.click(
193
- fn=create_file,
194
- inputs=[output],
195
- outputs=[download_btn]
196
- )
197
-
198
- return demo
199
-
200
- # Main execution
201
- if __name__ == "__main__":
202
- demo = create_interface()
203
  demo.launch()
 
1
+ import gradio as gr
2
+ import google.generativeai as genai
3
+ from duckduckgo_search import DDGS
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ import time
7
+ from urllib.parse import urlparse
8
+ import re
9
+ import json
10
+ from typing import List, Dict, Any
11
+
12
+ # Search the web for relevant information using DuckDuckGo
13
+ def web_search(query: str, max_results: int = 10) -> List[Dict[str, str]]:
14
+ """Search the web for relevant information using DuckDuckGo"""
15
+ try:
16
+ with DDGS() as ddgs:
17
+ results = list(ddgs.text(query, max_results=max_results))
18
+ return results
19
+ except Exception as e:
20
+ print(f"Search error: {e}")
21
+ return []
22
+
23
+ # Fetch and extract content from a URL
24
+ def fetch_url_content(url: str) -> str:
25
+ """Fetch content from a URL and extract meaningful text"""
26
+ try:
27
+ headers = {
28
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
29
+ }
30
+ response = requests.get(url, headers=headers, timeout=10)
31
+ response.raise_for_status()
32
+
33
+ soup = BeautifulSoup(response.content, 'html.parser')
34
+
35
+ # Remove unwanted elements
36
+ for element in soup(['script', 'style', 'nav', 'footer', 'header', 'aside']):
37
+ element.decompose()
38
+
39
+ # Get text content
40
+ text = soup.get_text()
41
+
42
+ # Clean up text
43
+ lines = (line.strip() for line in text.splitlines())
44
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
45
+ text = ' '.join(chunk for chunk in chunks if chunk)
46
+
47
+ return text[:5000] # Limit content length to avoid token limits
48
+ except Exception as e:
49
+ print(f"Error fetching {url}: {e}")
50
+ return ""
51
+
52
+ # Research function using web search and content extraction
53
+ def perform_research(query: str, max_sources: int = 5) -> Dict[str, Any]:
54
+ """Perform research by searching and extracting content from multiple sources"""
55
+ print(f"Researching: {query}")
56
+
57
+ # Search for relevant sources
58
+ search_results = web_search(query, max_results=max_sources*2) # Get extra results to account for failed fetches
59
+
60
+ sources = []
61
+ content_chunks = []
62
+
63
+ for i, result in enumerate(search_results[:max_sources]):
64
+ print(f"Fetching content from {result['href']}")
65
+ content = fetch_url_content(result['href'])
66
+
67
+ if content and len(content) > 200: # Only include if we got meaningful content
68
+ sources.append({
69
+ 'title': result.get('title', 'No title'),
70
+ 'url': result.get('href', ''),
71
+ 'content': content
72
+ })
73
+ content_chunks.append(f"SOURCE {i+1}:\nURL: {result.get('href', '')}\nCONTENT:\n{content}\n")
74
+
75
+ # Be polite with delays between requests
76
+ time.sleep(1)
77
+
78
+ research_context = "\n".join(content_chunks)
79
+
80
+ return {
81
+ 'sources': sources,
82
+ 'research_context': research_context,
83
+ 'query': query
84
+ }
85
+
86
+ # Generate a research report using Gemini
87
+ def generate_research_report(research_data: Dict[str, Any], gemini_api_key: str) -> str:
88
+ """Generate a comprehensive research report using Gemini"""
89
+ if not gemini_api_key:
90
+ return "Gemini API key is required to generate the report."
91
+
92
+ try:
93
+ # Initialize Gemini
94
+ genai.configure(api_key=gemini_api_key)
95
+ model = genai.GenerativeModel('gemini-2.0-flash')
96
+
97
+ prompt = f"""
98
+ RESEARCH TOPIC: {research_data['query']}
99
+
100
+ RESEARCH CONTEXT FROM VARIOUS SOURCES:
101
+ {research_data['research_context']}
102
+
103
+ Please analyze this research and create a comprehensive, well-structured report with:
104
+ 1. Key findings and insights
105
+ 2. Detailed explanations of complex concepts
106
+ 3. Relevant examples and case studies
107
+ 4. Real-world applications
108
+ 5. Future predictions and trends
109
+ 6. Citations for all sources with links
110
+
111
+ Format your response using Markdown with appropriate headings, subheadings, bullet points, and bold text for emphasis.
112
+ """
113
+
114
+ response = model.generate_content(prompt)
115
+ return response.text
116
+ except Exception as e:
117
+ return f"Error generating report: {str(e)}"
118
+
119
+ # Main research function
120
+ def run_research(topic: str, gemini_api_key: str):
121
+ """Run the complete research process"""
122
+ if not gemini_api_key:
123
+ return "Please enter your Gemini API key.", None, gr.update(visible=False)
124
+
125
+ if not topic:
126
+ return "Please enter a research topic.", None, gr.update(visible=False)
127
+
128
+ try:
129
+ # Perform research
130
+ research_data = perform_research(topic)
131
+
132
+ if not research_data['sources']:
133
+ return "No relevant sources found. Please try a different search term.", None, gr.update(visible=False)
134
+
135
+ # Generate report
136
+ report = generate_research_report(research_data, gemini_api_key)
137
+
138
+ # Create a downloadable file
139
+ filename = f"{topic.replace(' ', '_')}_report.md"
140
+
141
+ return report, filename, gr.update(visible=True)
142
+
143
+ except Exception as e:
144
+ error_msg = f"An error occurred: {str(e)}"
145
+ return error_msg, None, gr.update(visible=False)
146
+
147
+ # Gradio interface
148
+ def create_interface():
149
+ with gr.Blocks(title="Gemini Deep Research Agent", theme=gr.themes.Soft()) as demo:
150
+ gr.Markdown("# 📘 Gemini Deep Research Agent")
151
+ gr.Markdown("This agent performs deep research on any topic using Google's Gemini and DuckDuckGo search")
152
+
153
+ with gr.Row():
154
+ with gr.Column(scale=1):
155
+ gr.Markdown("## API Configuration")
156
+ gemini_key = gr.Textbox(
157
+ label="Gemini API Key",
158
+ type="password",
159
+ placeholder="Enter your Gemini API key (get it from https://aistudio.google.com/)"
160
+ )
161
+
162
+ with gr.Column(scale=2):
163
+ research_topic = gr.Textbox(
164
+ label="Research Topic",
165
+ placeholder="e.g., Latest developments in AI",
166
+ lines=2
167
+ )
168
+
169
+ research_btn = gr.Button("Start Research", variant="primary")
170
+
171
+ output = gr.Markdown(
172
+ label="Research Report",
173
+ value="Your research report will appear here..."
174
+ )
175
+
176
+ download_btn = gr.DownloadButton(
177
+ "Download Report",
178
+ visible=False
179
+ )
180
+
181
+ # Set up the button action
182
+ research_btn.click(
183
+ fn=run_research,
184
+ inputs=[research_topic, gemini_key],
185
+ outputs=[output, download_btn, download_btn]
186
+ )
187
+
188
+ # Set up download functionality
189
+ def create_file(content):
190
+ return content
191
+
192
+ download_btn.click(
193
+ fn=create_file,
194
+ inputs=[output],
195
+ outputs=[download_btn]
196
+ )
197
+
198
+ return demo
199
+
200
+ # Main execution
201
+ if __name__ == "__main__":
202
+ demo = create_interface()
203
  demo.launch()