Soham Waghmare commited on
Commit
54a7d14
·
1 Parent(s): ceae8b0

feat: improve scraping and context handling

Browse files
backend/.vscode/launch.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ // Use IntelliSense to learn about possible attributes.
3
+ // Hover to view descriptions of existing attributes.
4
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
+ "version": "0.2.0",
6
+ "configurations": [
7
+ {
8
+ // Fastapi
9
+ "name": "Launch FastAPI",
10
+ "type": "debugpy",
11
+ "request": "launch",
12
+ "program": "${workspaceFolder}/app.py",
13
+ "args": ["run", "--reload"],
14
+ "justMyCode": true
15
+ },
16
+ {
17
+ "name": "Python Debugger: Current File",
18
+ "type": "debugpy",
19
+ "request": "launch",
20
+ "program": "${file}",
21
+ "console": "integratedTerminal",
22
+ "justMyCode": true
23
+ }
24
+ ]
25
+ }
backend/app.py CHANGED
@@ -15,12 +15,12 @@ logger = logging.getLogger(__name__)
15
 
16
  app = FastAPI()
17
  # Increased pingTimeout and added logger
18
- sio = socketio.AsyncServer(cors_allowed_origins="*", ping_timeout=60, async_mode="asgi")
19
  app.mount('/', socketio.ASGIApp(sio))
20
 
21
  # Initialize the scraper and KNet
22
- # scraper_instance = CrawlForAIScraper()
23
- scraper_instance = WebScraper()
24
  knet = KNet(scraper_instance)
25
 
26
 
@@ -50,20 +50,14 @@ async def start_research(sid, data):
50
 
51
  async def progress_callback(status):
52
  try:
53
- logger.debug(
54
- f"Progress update: {status['progress']}% - {status['message']}"
55
- )
56
- await sio.emit(
57
- "status",
58
- {"message": status["message"], "progress": status["progress"]},
59
- room=session_id,
60
- )
61
  except Exception as e:
62
  logger.error(f"Error in progress callback: {str(e)}")
63
  raise e
64
 
65
  try:
66
- research_results = knet.conduct_research(topic, progress_callback)
67
  logger.info(f"Research completed for topic: {topic}")
68
  await sio.emit("research_complete", research_results, room=session_id)
69
  except Exception as e:
 
15
 
16
  app = FastAPI()
17
  # Increased pingTimeout and added logger
18
+ sio = socketio.AsyncServer(cors_allowed_origins="*", ping_timeout=60, ping_interval=10, async_mode="asgi")
19
  app.mount('/', socketio.ASGIApp(sio))
20
 
21
  # Initialize the scraper and KNet
22
+ scraper_instance = CrawlForAIScraper()
23
+ # scraper_instance = WebScraper()
24
  knet = KNet(scraper_instance)
25
 
26
 
 
50
 
51
  async def progress_callback(status):
52
  try:
53
+ logger.debug(f"Progress update: {status['progress']}% - {status['message']}")
54
+ await sio.emit("status", {"message": status["message"], "progress": status["progress"]}, room=session_id)
 
 
 
 
 
 
55
  except Exception as e:
56
  logger.error(f"Error in progress callback: {str(e)}")
57
  raise e
58
 
59
  try:
60
+ research_results = await knet.conduct_research(topic, progress_callback)
61
  logger.info(f"Research completed for topic: {topic}")
62
  await sio.emit("research_complete", research_results, room=session_id)
63
  except Exception as e:
backend/crawl_ai.py CHANGED
@@ -15,9 +15,10 @@ async def main():
15
  # Create an instance of AsyncWebCrawler
16
  async with AsyncWebCrawler(config=base_browser) as crawler:
17
  # Run the crawler on a URL
18
- result = await crawler.arun(url=sys.argv[1], screenshot=False, cache_mode=CacheMode.BYPASS, wait_for="js:() => window.loaded === true")
19
  # Print the extracted content
20
  hr = lambda: print(("-" * 80) * 2)
 
21
  print(result.markdown)
22
  hr()
23
  print(json.dumps(result.media, indent=2))
@@ -25,6 +26,7 @@ async def main():
25
  print(json.dumps(result.links, indent=2))
26
  hr()
27
  print(json.dumps(result.downloaded_files, indent=2))
 
28
 
29
  # if result.success:
30
  # # Save screenshot
 
15
  # Create an instance of AsyncWebCrawler
16
  async with AsyncWebCrawler(config=base_browser) as crawler:
17
  # Run the crawler on a URL
18
+ result = await crawler.arun(url=sys.argv[1], screenshot=False, cache_mode=CacheMode.BYPASS)
19
  # Print the extracted content
20
  hr = lambda: print(("-" * 80) * 2)
21
+ hr()
22
  print(result.markdown)
23
  hr()
24
  print(json.dumps(result.media, indent=2))
 
26
  print(json.dumps(result.links, indent=2))
27
  hr()
28
  print(json.dumps(result.downloaded_files, indent=2))
29
+ hr()
30
 
31
  # if result.success:
32
  # # Save screenshot
backend/knet.py CHANGED
@@ -6,7 +6,6 @@ import json
6
  import os
7
  from datetime import datetime
8
  from dotenv import load_dotenv
9
- from scraper import WebScraper, CrawlForAIScraper
10
  from research_node import ResearchNode
11
  from collections import deque
12
  import asyncio
@@ -20,10 +19,12 @@ class ResearchProgress:
20
  self.progress = 0
21
  self.callback = callback
22
 
23
- def update(self, progress: int, message: str):
24
- self.progress = progress
 
 
25
  if self.callback:
26
- self.callback({"progress": progress, "message": message})
27
 
28
 
29
  class KNet:
@@ -39,19 +40,20 @@ class KNet:
39
  "gemini-2.0-flash-lite-preview-02-05",
40
  generation_config={"temperature": 0.7},
41
  )
 
42
 
43
  self.research_manager = genai.GenerativeModel(
44
  "gemini-2.0-flash-lite-preview-02-05",
45
  generation_config={"temperature": 0.3},
46
  )
 
47
 
48
  # Initialize scraper
49
  self.scraper = scraper_instance
50
 
51
  self.logger = logging.getLogger(__name__)
52
- self.max_depth = 3
53
  self.max_breadth = 3
54
- self.min_importance_score = 0.6
55
 
56
  self.search_prompt = """Generate 3-5 specific search queries to research the following topic: {topic}
57
 
@@ -68,7 +70,8 @@ class KNet:
68
  Current Topic: {query}
69
  Current Depth: {depth}
70
  Path from Root: {path}
71
- Key Findings: {findings}
 
72
 
73
  Consider:
74
  1. Relevance to main topic
@@ -90,7 +93,7 @@ class KNet:
90
  "response_mime_type": "application/json",
91
  }
92
 
93
- # Analysis schema without reason
94
  self.analysis_schema = {
95
  "response_schema": content.Schema(
96
  type=content.Type.OBJECT,
@@ -115,35 +118,32 @@ class KNet:
115
  def _track_tokens(self, tokens: int) -> None:
116
  self.token_count += tokens
117
 
118
- def _should_branch_deeper(self, node: ResearchNode) -> bool:
119
- findings = ""
120
  if node.data:
121
- findings = "\n".join(
122
- [
123
- f"- {d.get('title', 'Untitled')}: {d.get('summary', '')}"
124
- for d in node.data[:3]
125
- if d
126
- ]
127
- )
128
 
 
129
  prompt = self.branch_decision_prompt.format(
130
  query=node.query,
131
  depth=node.depth,
132
  path=" -> ".join(node.get_path_to_root()),
133
- findings=findings,
134
  )
135
-
136
  response = self.research_manager.generate_content(
137
  prompt, generation_config={**self.branch_schema}
138
  )
139
  self._track_tokens(response.usage_metadata.total_token_count)
140
-
141
  result = json.loads(response.text)
142
  self.logger.info(f"Branch decision for '{node.query}': {result['decision']}")
143
 
144
  return result["decision"]
145
 
146
- def conduct_research(self, topic: str, progress_callback=None) -> Dict[str, Any]:
147
  self.token_count = 0
148
  progress = ResearchProgress(progress_callback)
149
  self.logger.info(f"Starting research on topic: {topic}")
@@ -151,88 +151,79 @@ class KNet:
151
  try:
152
  root_node = ResearchNode(topic)
153
  to_explore = deque([(root_node, 0)]) # (node, depth) pairs
154
- explored_queries = set()
155
- max_branches = self.max_breadth
156
 
157
- progress.update(10, "Starting research...")
158
 
159
- while to_explore and len(explored_queries) < max_branches:
160
  current_node, current_depth = to_explore.popleft()
161
 
162
- if current_node.query in explored_queries or current_depth >= self.max_depth:
163
  continue
164
 
165
  self.logger.info(f"Exploring: {current_node.query} (Depth: {current_depth})")
166
- progress.update(30 + int(len(explored_queries) * 50 / max_branches), f"Exploring: {current_node.query}")
167
 
168
  # Search and scrape
169
- current_node.data = self.scraper.search_and_scrape(current_node.query)
 
170
  explored_queries.add(current_node.query)
171
 
172
  # Only branch if we have data and haven't reached max depth
173
  if current_node.data and current_depth < self.max_depth:
174
- if self._should_branch_deeper(current_node):
175
- new_branches = self._analyze_and_branch(current_node)
176
  for branch in new_branches:
177
  to_explore.append((branch, current_depth + 1))
178
- self.logger.info(f"Added {len(new_branches)} new branches at depth {current_depth + 1}")
179
 
180
  # Generate final report
181
- progress.update(80, "Generating comprehensive report...")
182
  final_report = self._generate_final_report(root_node)
183
- final_report["metadata"]["total_tokens"] = self.token_count
184
 
185
- self.logger.info(
186
- f"Research completed. Explored {len(explored_queries)} queries across {root_node.depth + 1} levels"
187
- )
188
- progress.update(100, "Research complete!")
189
 
 
 
190
  return final_report
191
 
192
  except Exception as e:
193
  self.logger.error(f"Research failed: {str(e)}")
194
  raise e
195
 
196
- def _analyze_and_branch(self, node: ResearchNode) -> List[ResearchNode]:
197
  if not node.data:
198
  return []
199
 
200
- findings = "\n".join([
201
- f"- {d.get('title', 'Untitled')}: {d.get('summary', d.get('text', '')[:200])}"
202
- for d in node.data[:3] if d
203
- ])
204
-
205
- analysis_prompt = f"""Based on the following findings about "{node.query}", suggest new research directions.
206
-
207
  Findings:
208
- {findings}
209
 
210
- Suggest up to 3 specific research queries that:
211
- 1. Build upon these findings
212
- 2. Explore different aspects
213
- 3. Go deeper into important details
214
 
215
- Return as JSON array of objects with only:
216
- - importance (0.0-1.0)
217
  - query (string)"""
218
 
219
  try:
220
  response = self.research_manager.generate_content(
221
- analysis_prompt,
222
- generation_config={**self.analysis_schema},
223
  )
224
  self._track_tokens(response.usage_metadata.total_token_count)
225
-
226
  result = json.loads(response.text)
227
  self.logger.info(f"New branches for '{node.query}': {result['branches']}")
228
 
 
 
 
 
229
  new_nodes = []
230
  for branch in result.get("branches", []):
231
- if branch["importance"] >= self.min_importance_score:
232
- child_node = node.add_child(branch["query"])
233
- child_node.importance_score = branch["importance"]
234
- new_nodes.append(child_node)
235
-
236
  return new_nodes
237
 
238
  except Exception as e:
@@ -240,78 +231,35 @@ class KNet:
240
  return []
241
 
242
  def _generate_final_report(self, root_node: ResearchNode) -> Dict[str, Any]:
243
- def collect_data(node: ResearchNode) -> List[Dict]:
244
- all_data = []
245
- if node.data:
246
- all_data.extend(node.data)
247
- for child in node.children:
248
- all_data.extend(collect_data(child))
249
- return all_data
250
-
251
- all_research_data = collect_data(root_node)
252
-
253
- # Generate part 1 of the report
254
- part1_prompt = f"""Generate part 1 of a research report focusing on overview and key findings.
255
- Main Topic: {root_node.query}
256
-
257
- Structure for Part 1:
258
- 1. Executive Summary (brief overview)
259
- 2. Key Findings (main discoveries and insights)
260
-
261
- Keep it concise and focused. Part 2 will cover detailed analysis and references."""
262
-
263
- response1 = self.research_manager.generate_content(part1_prompt)
264
- self._track_tokens(response1.usage_metadata.total_token_count)
265
- part1_content = response1.text
266
-
267
- # Generate part 2 with awareness of part 1
268
- part2_prompt = f"""Generate part 2 of the research report. Here's part 1 for context:
269
-
270
- {part1_content}
271
-
272
- Now continue with:
273
- 1. Detailed Analysis (expand on the key findings)
274
- 2. Related Topics and Branches (explore connections)
275
- 3. Sources and References (cite sources)
276
-
277
- Focus on details that complement part 1 without repeating the same information."""
278
-
279
- response2 = self.research_manager.generate_content(part2_prompt)
280
- self._track_tokens(response2.usage_metadata.total_token_count)
281
-
282
- # Combine reports with clear section separation
283
- report_content = f"""# Research Report: {root_node.query}
284
-
285
- Part 1: Overview and Key Findings
286
- --------------------------------
287
- {part1_content}
288
-
289
- Part 2: Detailed Analysis and References
290
- --------------------------------------
291
- {response2.text}"""
292
 
293
- # Organize multimedia content
294
  media_content = {"images": [], "videos": [], "links": [], "references": []}
295
-
296
- for data in all_research_data:
297
  if data.get("images"):
298
  media_content["images"].extend(data["images"])
299
  if data.get("videos"):
300
  media_content["videos"].extend(data["videos"])
301
  if data.get("links"):
302
- media_content["links"].append(
303
- {
304
- "url": data["url"],
305
- "title": data.get("title", ""),
306
- "summary": data.get("summary", ""),
307
- }
308
- )
309
 
310
  # Build research tree structure
311
  def build_tree_structure(node: ResearchNode) -> Dict:
 
 
312
  return {
313
  "query": node.query,
314
- "importance": node.importance_score,
315
  "depth": node.depth,
316
  "children": [build_tree_structure(child) for child in node.children],
317
  }
@@ -319,11 +267,12 @@ Part 2: Detailed Analysis and References
319
  return {
320
  "topic": root_node.query,
321
  "timestamp": datetime.now().isoformat(),
322
- "content": report_content,
323
  "media": media_content,
324
  "research_tree": build_tree_structure(root_node),
325
  "metadata": {
326
- "total_sources": root_node.total_children(),
 
327
  "max_depth_reached": root_node.max_depth(),
328
  "total_tokens": self.token_count,
329
  },
 
6
  import os
7
  from datetime import datetime
8
  from dotenv import load_dotenv
 
9
  from research_node import ResearchNode
10
  from collections import deque
11
  import asyncio
 
19
  self.progress = 0
20
  self.callback = callback
21
 
22
+ async def update(self, progress: int, message: str):
23
+ self.progress += progress
24
+ if self.progress > 100:
25
+ self.progress = 100
26
  if self.callback:
27
+ await self.callback({"progress": self.progress, "message": message})
28
 
29
 
30
  class KNet:
 
40
  "gemini-2.0-flash-lite-preview-02-05",
41
  generation_config={"temperature": 0.7},
42
  )
43
+ self.ctx_researcher = []
44
 
45
  self.research_manager = genai.GenerativeModel(
46
  "gemini-2.0-flash-lite-preview-02-05",
47
  generation_config={"temperature": 0.3},
48
  )
49
+ self.ctx_manager = []
50
 
51
  # Initialize scraper
52
  self.scraper = scraper_instance
53
 
54
  self.logger = logging.getLogger(__name__)
55
+ self.max_depth = 2
56
  self.max_breadth = 3
 
57
 
58
  self.search_prompt = """Generate 3-5 specific search queries to research the following topic: {topic}
59
 
 
70
  Current Topic: {query}
71
  Current Depth: {depth}
72
  Path from Root: {path}
73
+ Key Findings:
74
+ {findings}
75
 
76
  Consider:
77
  1. Relevance to main topic
 
93
  "response_mime_type": "application/json",
94
  }
95
 
96
+ # Analysis schema
97
  self.analysis_schema = {
98
  "response_schema": content.Schema(
99
  type=content.Type.OBJECT,
 
118
  def _track_tokens(self, tokens: int) -> None:
119
  self.token_count += tokens
120
 
121
+ def _should_branch_deeper(self, node: ResearchNode, topic: str) -> bool:
122
+ # Generate summary of key findings into research_manager's context
123
  if node.data:
124
+ findings = ("\n" + "-"*10 + "Next data" + "-"*10 + "\n").join([json.dumps(d, indent=2) for d in node.data])
125
+ response = self.llm.generate_content(f"Extract key findings from the following data related to the topic '{topic}':\n{findings}")
126
+ self._track_tokens(response.usage_metadata.total_token_count)
127
+ findings = response.text
128
+ self.ctx_manager.append(findings)
 
 
129
 
130
+ # Research manager takes decision to proceed or not
131
  prompt = self.branch_decision_prompt.format(
132
  query=node.query,
133
  depth=node.depth,
134
  path=" -> ".join(node.get_path_to_root()),
135
+ findings="\n".join(self.ctx_manager),
136
  )
 
137
  response = self.research_manager.generate_content(
138
  prompt, generation_config={**self.branch_schema}
139
  )
140
  self._track_tokens(response.usage_metadata.total_token_count)
 
141
  result = json.loads(response.text)
142
  self.logger.info(f"Branch decision for '{node.query}': {result['decision']}")
143
 
144
  return result["decision"]
145
 
146
+ async def conduct_research(self, topic: str, progress_callback=None) -> Dict[str, Any]:
147
  self.token_count = 0
148
  progress = ResearchProgress(progress_callback)
149
  self.logger.info(f"Starting research on topic: {topic}")
 
151
  try:
152
  root_node = ResearchNode(topic)
153
  to_explore = deque([(root_node, 0)]) # (node, depth) pairs
154
+ explored_queries = set() # {string, string, ...}
 
155
 
156
+ await progress.update(5, "Starting research...")
157
 
158
+ while to_explore:
159
  current_node, current_depth = to_explore.popleft()
160
 
161
+ if (current_node.query in explored_queries or current_depth >= self.max_depth):
162
  continue
163
 
164
  self.logger.info(f"Exploring: {current_node.query} (Depth: {current_depth})")
165
+ await progress.update(5, f"Exploring: {current_node.query}")
166
 
167
  # Search and scrape
168
+ current_node.data = await self.scraper.search_and_scrape(current_node.query, 3) # node -> data = [{url:...}, {url:...}, ...]
169
+ self.ctx_researcher.append(json.dumps(current_node.data, indent=2))
170
  explored_queries.add(current_node.query)
171
 
172
  # Only branch if we have data and haven't reached max depth
173
  if current_node.data and current_depth < self.max_depth:
174
+ if self._should_branch_deeper(current_node, topic):
175
+ new_branches = self._analyze_and_branch(current_node, topic)
176
  for branch in new_branches:
177
  to_explore.append((branch, current_depth + 1))
178
+ self.logger.info(f"Added {len(new_branches)} new branch(es) at depth {current_depth + 1}")
179
 
180
  # Generate final report
181
+ await progress.update(30, "Generating comprehensive report...")
182
  final_report = self._generate_final_report(root_node)
 
183
 
184
+ self.logger.info(f"Research completed. Explored {len(explored_queries)} queries across {root_node.max_depth()} levels")
185
+ await progress.update(100, "Research complete!")
 
 
186
 
187
+ with open("output.json", "w") as f:
188
+ json.dump(final_report, f, indent=2)
189
  return final_report
190
 
191
  except Exception as e:
192
  self.logger.error(f"Research failed: {str(e)}")
193
  raise e
194
 
195
+ def _analyze_and_branch(self, node: ResearchNode, topic: str) -> List[ResearchNode]:
196
  if not node.data:
197
  return []
198
 
199
+ analysis_prompt = f"""Based on the following findings about "{topic}", suggest new research directions.
 
 
 
 
 
 
200
  Findings:
201
+ {json.dumps(self.ctx_manager, indent=2)}
202
 
203
+ Suggest up to {self.max_breadth} specific google search queries that would help data which:
204
+ - Builds upon these findings
205
+ - Explores different aspects
206
+ - Goes deeper into important details
207
 
208
+ Return as JSON array of objects with properties:
 
209
  - query (string)"""
210
 
211
  try:
212
  response = self.research_manager.generate_content(
213
+ analysis_prompt, generation_config={**self.analysis_schema}
 
214
  )
215
  self._track_tokens(response.usage_metadata.total_token_count)
 
216
  result = json.loads(response.text)
217
  self.logger.info(f"New branches for '{node.query}': {result['branches']}")
218
 
219
+ # Add children to current node
220
+ # +> child1
221
+ # node - +> child2
222
+ # +> child3
223
  new_nodes = []
224
  for branch in result.get("branches", []):
225
+ child_node = node.add_child(branch["query"])
226
+ new_nodes.append(child_node)
 
 
 
227
  return new_nodes
228
 
229
  except Exception as e:
 
231
  return []
232
 
233
  def _generate_final_report(self, root_node: ResearchNode) -> Dict[str, Any]:
234
+ findings = "\n".join(self.ctx_researcher)
235
+ prompt = f"""Generate a comprehensive report on the topic "{root_node.query}" based on the following research findings:
236
+ {findings}
237
+ """
238
+ response = self.research_manager.generate_content(prompt)
239
+ self._track_tokens(response.usage_metadata.total_token_count)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
 
241
+ # Collate multimedia content
242
  media_content = {"images": [], "videos": [], "links": [], "references": []}
243
+ all_sources_data = root_node.get_all_data()
244
+ for data in all_sources_data:
245
  if data.get("images"):
246
  media_content["images"].extend(data["images"])
247
  if data.get("videos"):
248
  media_content["videos"].extend(data["videos"])
249
  if data.get("links"):
250
+ media_content["links"].extend([{"url": l["href"], "text": l["text"]} for l in data["links"]])
251
+ # Deduplicate
252
+ media_content["images"] = list(set(media_content["images"]))
253
+ media_content["videos"] = list(set(media_content["videos"]))
254
+ media_content["links"] = list({json.dumps(d, sort_keys=True) for d in media_content["links"]})
255
+ media_content["links"] = [json.loads(d) for d in media_content["links"]]
 
256
 
257
  # Build research tree structure
258
  def build_tree_structure(node: ResearchNode) -> Dict:
259
+ if not node:
260
+ return {}
261
  return {
262
  "query": node.query,
 
263
  "depth": node.depth,
264
  "children": [build_tree_structure(child) for child in node.children],
265
  }
 
267
  return {
268
  "topic": root_node.query,
269
  "timestamp": datetime.now().isoformat(),
270
+ "content": response.text,
271
  "media": media_content,
272
  "research_tree": build_tree_structure(root_node),
273
  "metadata": {
274
+ "total_queries": root_node.total_children(),
275
+ "total_sources": len(all_sources_data),
276
  "max_depth_reached": root_node.max_depth(),
277
  "total_tokens": self.token_count,
278
  },
backend/output.json CHANGED
@@ -1,330 +1,652 @@
1
  {
2
- "topic": "importance of sleep in our life",
3
- "timestamp": "2025-02-24T20:15:01.222925",
4
- "content": "# Research Report: importance of sleep in our life
5
 
6
- Part 1: Overview and Key Findings
7
- --------------------------------
8
- ## Part 1: Sleep and Its Importance - Overview and Key Findings
9
 
10
- **1. Executive Summary**
11
 
12
- This research report investigates the critical role of sleep in human health and well-being. It explores the multifaceted impact of sleep on cognitive function, physical health, and emotional regulation. The findings highlight the detrimental consequences of sleep deprivation and underscore the importance of prioritizing sufficient, quality sleep for optimal performance and overall quality of life.
13
 
14
- **2. Key Findings**
15
 
16
- * **Cognitive Performance:** Adequate sleep is directly correlated with improved cognitive functions, including memory consolidation, attention span, decision-making, and problem-solving abilities. Sleep deprivation significantly impairs these cognitive processes, leading to reduced productivity and increased errors.
17
 
18
- * **Physical Health:** Sufficient sleep plays a vital role in regulating physiological processes, such as immune function, hormonal balance, and cardiovascular health. Chronic sleep loss is linked to an increased risk of chronic diseases, including obesity, diabetes, and heart disease.
19
 
20
- * **Emotional Well-being:** Sleep profoundly impacts emotional regulation. Sleep deprivation can exacerbate mood swings, increase irritability, and heighten the risk of anxiety and depression. Conversely, consistent, quality sleep promotes emotional stability and resilience.
 
 
 
21
 
22
- * **Impact of Sleep Disorders:** The prevalence of sleep disorders, such as insomnia and sleep apnea, negatively affects overall health and well-being. These disorders disrupt sleep patterns, leading to daytime fatigue, impaired cognitive function, and increased susceptibility to various health problems.
23
 
 
24
 
25
- Part 2: Detailed Analysis and References
26
- --------------------------------------
27
- ## Part 2: Sleep and Its Importance - Detailed Analysis, Related Topics, and Sources
28
 
29
- **1. Detailed Analysis**
30
 
31
- This section delves deeper into the key findings outlined in Part 1, providing a more comprehensive understanding of the mechanisms and implications of sleep on various aspects of human health.
32
 
33
- * **1.1 Cognitive Performance: The Neural Symphony of Sleep**
 
 
34
 
35
- Beyond the general correlation, the impact of sleep on cognitive function is rooted in specific neural processes. During sleep, particularly during slow-wave sleep (SWS) and rapid eye movement (REM) sleep, the brain engages in crucial activities. SWS is characterized by the consolidation of declarative memories (facts and events), where information learned during the day is transferred from the hippocampus to the neocortex for long-term storage. This process, known as systems consolidation, is essential for learning and retaining new information. REM sleep, on the other hand, is associated with the consolidation of procedural memories (skills and habits) and emotional processing. The brain reactivates and replays recently learned information during REM, strengthening neural connections and improving performance on tasks requiring motor skills or pattern recognition. Sleep deprivation disrupts these processes, leading to fragmented memory, difficulty learning new information, and impaired ability to retrieve existing knowledge. Studies utilizing neuroimaging techniques, such as fMRI and EEG, have provided compelling evidence of these sleep-dependent cognitive processes, revealing altered brain activity patterns in sleep-deprived individuals. Furthermore, the prefrontal cortex, responsible for executive functions like decision-making and impulse control, is particularly vulnerable to sleep loss, explaining the increased risk-taking behavior and poor judgment often observed in sleep-deprived individuals.
36
 
37
- * **1.2 Physical Health: A Restorative Reset for the Body**
38
 
39
- The restorative function of sleep extends to the cellular level, impacting various physiological systems. During sleep, the body releases growth hormone, essential for tissue repair and muscle growth. The immune system also benefits significantly from sleep. Cytokines, proteins that signal the immune system, are produced and released during sleep, particularly during SWS. These cytokines help fight infection and inflammation. Chronic sleep deprivation weakens the immune response, making individuals more susceptible to illness and prolonging recovery times. Furthermore, sleep plays a critical role in metabolic regulation. Sleep deprivation disrupts the balance of hormones like insulin and leptin, leading to increased appetite, reduced glucose tolerance, and an increased risk of developing type 2 diabetes. Cardiovascular health is also profoundly affected. Sleep allows the cardiovascular system to rest and recover. Chronic sleep loss is associated with elevated blood pressure, increased heart rate variability, and an increased risk of developing cardiovascular diseases. The mechanisms underlying these effects involve the dysregulation of the autonomic nervous system and the increased production of stress hormones, such as cortisol.
 
 
40
 
41
- * **1.3 Emotional Well-being: The Emotional Regulator in the Night**
42
 
43
- The impact of sleep on emotional regulation is multifaceted and deeply intertwined with the brain's emotional centers. The amygdala, responsible for processing emotions, particularly fear and anxiety, is highly active during sleep deprivation. This heightened activity can lead to increased emotional reactivity, making individuals more prone to experiencing negative emotions like anger, sadness, and anxiety. The prefrontal cortex, which plays a crucial role in regulating the amygdala and controlling emotional responses, is also impaired by sleep loss. This impairment reduces the ability to effectively manage emotions and make rational decisions. Furthermore, sleep deprivation disrupts the balance of neurotransmitters, such as serotonin and dopamine, which are critical for mood regulation. Insufficient sleep can lead to a decrease in serotonin levels, contributing to feelings of depression and anxiety. Conversely, adequate sleep promotes emotional stability and resilience. Sleep allows the brain to process and regulate emotions, leading to improved mood, reduced irritability, and a greater ability to cope with stress. Studies have shown that individuals with sleep disorders, such as insomnia, are at a significantly higher risk of developing mood disorders, highlighting the strong link between sleep and emotional well-being.
44
 
45
- * **1.4 Impact of Sleep Disorders: The Silent Disruptors**
 
 
 
 
 
46
 
47
- Sleep disorders represent a significant public health concern, affecting millions worldwide. Insomnia, characterized by difficulty falling asleep, staying asleep, or experiencing non-restorative sleep, is one of the most prevalent sleep disorders. It can lead to daytime fatigue, impaired cognitive function, and increased risk of accidents. Sleep apnea, a condition characterized by pauses in breathing during sleep, disrupts sleep architecture and leads to chronic sleep deprivation. This can result in daytime sleepiness, cardiovascular problems, and an increased risk of stroke. Other sleep disorders, such as restless legs syndrome (RLS) and narcolepsy, also significantly impact sleep quality and overall health. RLS causes an irresistible urge to move the legs, disrupting sleep and leading to daytime fatigue. Narcolepsy is a neurological disorder characterized by excessive daytime sleepiness, sudden sleep attacks, and other symptoms. Effective diagnosis and treatment of sleep disorders are crucial for improving sleep quality, reducing health risks, and enhancing overall well-being. This often involves a combination of lifestyle modifications, behavioral therapies, and, in some cases, medication.
48
 
49
- **2. Related Topics and Branches**
50
 
51
- This section explores related topics and branches of research that expand upon the core themes of sleep and its importance.
52
 
53
- * **2.1 Chronobiology and Circadian Rhythms:** The study of circadian rhythms, the internal biological clocks that regulate the sleep-wake cycle and other physiological processes, is fundamental to understanding sleep. Research in chronobiology explores the influence of light, temperature, and other environmental cues on these rhythms and how disruptions to these rhythms can impact health. This includes the study of shift work, jet lag, and the effects of artificial light exposure on sleep patterns.
54
 
55
- * **2.2 Sleep and Mental Health:** The relationship between sleep and mental health is a complex and bidirectional one. Sleep disorders are often comorbid with mental health conditions, such as depression, anxiety, and bipolar disorder. Research explores the underlying mechanisms that link sleep disturbances to mental health problems, including the role of neurotransmitters, hormonal imbalances, and altered brain activity patterns. This includes the use of sleep interventions as a therapeutic tool for mental health conditions.
56
 
57
- * **2.3 Sleep and Aging:** Sleep patterns change with age, with older adults often experiencing changes in sleep architecture, such as reduced SWS and increased sleep fragmentation. Research investigates the impact of these age-related changes on cognitive function, physical health, and emotional well-being. This includes the study of sleep disorders common in older adults, such as insomnia and sleep apnea, and interventions to improve sleep quality in this population.
58
 
59
- * **2.4 Sleep and Performance Enhancement:** The impact of sleep on performance, both physical and cognitive, is a growing area of research. This includes the study of sleep optimization strategies for athletes, students, and professionals seeking to enhance their performance. This involves exploring the effects of sleep duration, sleep timing, and sleep quality on various performance metrics, such as reaction time, accuracy, and endurance.
60
 
61
- * **2.5 Sleep and Technology:** The increasing use of technology, including smartphones, tablets, and laptops, has significantly impacted sleep patterns. Research explores the effects of blue light exposure from these devices on circadian rhythms and sleep quality. This includes the development of sleep-tracking technologies and interventions to mitigate the negative effects of technology on sleep.
62
-
63
- **3. Sources and References**
64
-
65
- This section provides a list of sources and references used in the research report.
66
-
67
- * **Carskadon, M. A., & Dement, W. C. (2011). Normal human sleep: An overview. *Principles and practice of sleep medicine*, 5, 16-26.** (Provides a comprehensive overview of normal sleep physiology and architecture.)
68
-
69
- * **Walker, M. P. (2009). The role of sleep in cognition and emotion. *Annals of the New York Academy of Sciences*, *1156*, 168-197.** (Explores the specific neural mechanisms underlying the impact of sleep on cognitive and emotional processes.)
70
-
71
- * **Gangwisch, J. E. (2009). Sleep and metabolic function. *Sleep*, *32*(8), 981-988.** (Examines the relationship between sleep and metabolic health, including obesity, diabetes, and cardiovascular disease.)
72
-
73
- * **Riemann, D., Baglioni, C., Bassetti, C., Bjorvatn, B., Bonnet, M. H., & Espie, C. A. (2010). European guideline for the diagnosis and treatment of insomnia. *Journal of Sleep Research*, *19*(2), 137-160.** (Provides guidelines for the diagnosis and treatment of insomnia.)
74
-
75
- * **National Institutes of Health (NIH). (2023). *Sleep Disorders*. Retrieved from [Insert NIH Website Link Here]** (Provides up-to-date information on sleep disorders and related research.)
76
-
77
- * **American Academy of Sleep Medicine (AASM). (2023). *Sleep Education*. Retrieved from [Insert AASM Website Link Here]** (Offers educational resources on sleep and sleep disorders.)
78
-
79
- * **Czeisler, C. A., Duffy, J. F., Shanahan, T. L., Brown, E. N., Rimmer, D. W., Ronda, J. M., ... & Kronauer, R. E. (1999). Stability, precision, and near-24-hour period of the human circadian pacemaker. *Science*, *284*(5423), 1329-1332.** (Provides research on the human circadian pacemaker.)
80
-
81
- * **Goel, N., Rao, H., Durkin, P. R., & D'Ambrosio, D. (2013). Sleep deprivation impairs the prefrontal cortex-dependent regulation of the amygdala. *Journal of Neuroscience*, *33*(28), 11639-11646.** (Provides research on the impact of sleep deprivation on the prefrontal cortex and amygdala.)
82
-
83
- * **Mander, B. A., Winer, J. R., Jagust, W. J., & Walker, M. P. (2016). Sleep disturbance and the aging brain. *Trends in Neurosciences*, *39*(1), 33-43.** (Provides research on the impact of sleep on the aging brain.)
84
-
85
- **(Note: Replace the bracketed placeholders with the actual website links.)**
86
  ",
87
- "media": {
88
- "images": [
89
- "https://www.nhlbi.nih.gov/themes/custom/nhlbi/images/ico-youtube-white.svg",
90
- "data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7",
91
- "https://www.nhlbi.nih.gov/themes/custom/nhlbi/images/NHLBI_Meta_Image.png",
92
- "https://www.nhlbi.nih.gov/themes/custom/nhlbi/images/x-twitter-logo.svg",
93
- "https://www.nhlbi.nih.gov/themes/custom/nhlbi/images/linkedin-brands.svg",
94
- "https://www.nhlbi.nih.gov/themes/custom/nhlbi/images/share-alt-icon.svg",
95
- "https://www.nhlbi.nih.gov/themes/custom/nhlbi/images/ico-facebook-white.svg",
96
- "https://www.nhlbi.nih.gov/modules/custom/nhlbi_blazy/images/clear.png",
97
- "https://www.sleepfoundation.org/wp-content/uploads/2022/07/sf-instagram.svg",
98
- "https://www.sleepfoundation.org/wp-content/uploads/2018/10/ExcessiveSleepiness_1440x360.jpg",
99
- "https://www.sleepfoundation.org/wp-content/uploads/2021/06/Hypnopompic-Hallucinations-300x169.jpg",
100
- "https://www.sleepfoundation.org/wp-content/plugins/ocm-social-networks-shortcode/assets/images/Print.svg",
101
- "https://www.sleepfoundation.org/wp-content/plugins/ocm-social-networks-shortcode/assets/images/Email.svg",
102
- "https://www.sleepfoundation.org/wp-content/uploads/2020/12/Smile-Brilliant-1-300x214.jpg",
103
- "https://www.sleepfoundation.org/wp-content/uploads/2021/02/shutterstock_1432698452-300x200.jpg",
104
- "https://www.sleepfoundation.org/wp-content/uploads/2009/12/WhatHappensWhenYouSlee.jpg",
105
- "https://www.sleepfoundation.org/wp-content/uploads/2023/11/All-Nighters-Helpful-or-Harmful-300x200.jpg",
106
- "https://www.sleepfoundation.org/wp-content/uploads/2023/02/sf-linkedin.svg",
107
- "https://www.sleepfoundation.org/wp-content/uploads/2021/08/Sleep-Spindles-300x198.jpg",
108
- "https://www.sleepfoundation.org/wp-content/plugins/ocm-social-networks-shortcode/assets/images/Link.svg",
109
- "https://www.sleepfoundation.org/wp-content/uploads/2023/11/lucy_bryan-lg-profile-150x150.jpg",
110
- "https://www.sleepfoundation.org/wp-content/uploads/2021/09/How-to-Become-a-Morning-Person-300x200.jpg",
111
- "https://www.sleepfoundation.org/wp-content/uploads/2018/07/What-Causes-Restless-Sleep-300x207.jpg",
112
- "https://www.sleepfoundation.org/wp-content/uploads/2023/10/does-oxygen-drop-while-sleeping-300x158.jpg",
113
- "https://www.sleepfoundation.org/wp-content/uploads/2022/04/shutterstock_1413707132-300x192.jpg",
114
- "https://www.sleepfoundation.org/wp-content/uploads/2022/04/Eight-Health-Benefits-of-Sleep-300x200.jpg",
115
- "https://www.sleepfoundation.org/wp-content/uploads/2018/08/NSF-6-J_SleepFoundation_How-Sleep-Impacts-Your-Energy-Level-Throughout-the-Day_Purchased_900x560-300x125.jpeg",
116
- "https://www.sleepfoundation.org/wp-content/uploads/2023/02/sf-tiktok.svg",
117
- "https://www.sleepfoundation.org/wp-content/uploads/2021/08/REM-Rebound-300x200.jpg",
118
- "https://www.sleepfoundation.org/wp-content/uploads/2021/01/shutterstock_1033291948-300x200.jpg",
119
- "https://www.sleepfoundation.org/wp-content/uploads/2017/07/NSF-8-A-464028.jpg",
120
- "https://www.sleepfoundation.org/wp-content/uploads/2018/10/men-women-sleep-300x234.jpg",
121
- "https://www.sleepfoundation.org/wp-content/uploads/2023/09/group-of-people-drinking-coffee-300x200.jpg",
122
- "https://www.sleepfoundation.org/wp-content/uploads/2020/09/circadian-300x200.jpg",
123
- "https://www.sleepfoundation.org/wp-content/uploads/2024/05/Can-You-Learn-a-Language-While-Sleeping--300x200.jpg",
124
- "https://www.sleepfoundation.org/wp-content/uploads/2023/02/sf-facebook.svg",
125
- "https://www.sleepfoundation.org/wp-content/uploads/2021/05/woman-with-melatonin-300x200.jpg",
126
- "https://www.sleepfoundation.org/wp-content/uploads/2022/07/social_youtube.svg",
127
- "https://www.sleepfoundation.org/wp-content/uploads/2021/06/oversleeping-300x200.jpg",
128
- "https://www.sleepfoundation.org/wp-content/uploads/2021/08/Biphasic-Sleep-What-It-Is-And-How-It-Works-300x200.jpg",
129
- "https://www.sleepfoundation.org/wp-content/uploads/2022/06/adenosine-300x200.jpg",
130
- "https://www.sleepfoundation.org/wp-content/uploads/2014/06/26-NSF-9-I_neurons_2880x1400.jpg",
131
- "https://www.sleepfoundation.org/wp-content/uploads/2019/09/HowMuchSleepDoWeReallyNeed.jpg",
132
- "https://www.sleepfoundation.org/wp-content/uploads/2022/06/alpha-waves2-300x200.jpg",
133
- "https://www.sleepfoundation.org/wp-content/plugins/ocm-social-networks-shortcode/assets/images/Facebook.svg",
134
- "https://www.sleepfoundation.org/wp-content/uploads/2017/04/What’s-the-Connection-Between-Race-and-Sleep-Disorders-300x200.jpg",
135
- "https://www.sleepfoundation.org/wp-content/uploads/2020/02/mattress-sizes-300x225.jpg",
136
- "https://www.sleepfoundation.org/wp-content/uploads/2010/01/Sleep-Debt-and-Catching-Up-on-Sleep-300x200.jpg",
137
- "https://www.sleepfoundation.org/wp-content/uploads/2009/04/SleepDriveandYourBodyClock.jpg",
138
- "https://www.sleepfoundation.org/wp-content/uploads/2020/04/best_clips.00_09_45_18.still029_720-300x169.jpg",
139
- "https://www.sleepfoundation.org/wp-content/uploads/2022/04/Why-Do-I-Wake-Up-at-3am-300x200.jpg",
140
- "https://www.sleepfoundation.org/wp-content/uploads/2019/08/NSF_10_B_Sleepfoundation_HowYourBodyUsesCaloriesWhileSleeping_Purchased_1440x600.jpg",
141
- "https://www.sleepfoundation.org/wp-content/uploads/2022/03/Long-Sleepers-300x181.jpg",
142
- "https://www.sleepfoundation.org/wp-content/uploads/2009/05/shutterstock_1922139554-300x208.jpg",
143
- "https://www.sleepfoundation.org/wp-content/uploads/2024/02/brandon-peters-profile-150x150.png",
144
- "https://www.sleepfoundation.org/wp-content/uploads/2018/07/NSF-2-J_SleepFoundationorg_IfYouNapDuringDayCanYouSleepAtNight_Purchased-300x125.webp",
145
- "https://www.sleepfoundation.org/wp-content/uploads/2018/07/Daytime-Tiredness-300x200.jpg",
146
- "https://www.sleepfoundation.org/wp-content/uploads/2020/11/shutterstock_1455947342-300x200.jpg",
147
- "https://www.sleepfoundation.org/wp-content/uploads/2021/08/Microsleep-What-Is-It-What-Causes-It-and-Is-It-Safe-300x200.jpg",
148
- "https://www.sleepfoundation.org/wp-content/plugins/ocm-social-networks-shortcode/assets/images/Twitter.svg",
149
- "https://www.sleepfoundation.org/wp-content/uploads/2010/04/shutterstock_623157431-300x200.jpg",
150
- "https://www.sleepfoundation.org/wp-content/uploads/2024/04/Mattresses-in-the-Sleep-Foundation-Test-Lab-3-300x125.jpg",
151
- "https://www.sleepfoundation.org/wp-content/uploads/2023/02/person-stretch.png",
152
- "https://www.sleepfoundation.org/wp-content/uploads/2021/02/shutterstock_1397203685-300x200.jpg",
153
- "https://www.sleepfoundation.org/wp-content/uploads/2021/03/how-sleep-works-featured-sized-300x200.jpg",
154
- "https://www.sleepfoundation.org/wp-content/themes/onecaremedia-child/assets/images/sf-question-bubble.svg",
155
- "https://www.sleepfoundation.org/wp-content/uploads/2021/06/Hypnagogic-Hallucinations-300x200.jpg",
156
- "https://www.sleepfoundation.org/wp-content/uploads/2023/08/SF_Icon.svg",
157
- "https://www.sleepfoundation.org/wp-content/uploads/2021/05/Do-Moon-Phases-Affect-Your-Sleep-300x200.jpg",
158
- "https://www.sleepfoundation.org/wp-content/uploads/2022/04/How-to-Wake-Up-Easier-300x214.jpg",
159
- "https://www.sleepfoundation.org/wp-content/uploads/2021/08/Polyphasic-Sleep-Schedule-300x200.jpg",
160
- "https://www.sleepfoundation.org/wp-content/uploads/2021/08/Light-Sleeper-What-It-Means-and-What-To-Do-About-It--300x199.jpg",
161
- "https://www.sleepfoundation.org/wp-content/uploads/2020/09/Saatva-Lightweight-Down-Alternative-Comforter-253x140-1.jpeg",
162
- "https://www.sleepfoundation.org/wp-content/uploads/2023/07/Sweet-Zzz-White-Noise-Machine-300x197.jpg",
163
- "https://www.sleepfoundation.org/wp-content/uploads/2022/07/sf-twitter.svg",
164
- "https://www.sleepfoundation.org/wp-content/uploads/2021/08/Bedroom-Environment-1-300x125.jpg",
165
- "https://www.sleepfoundation.org/wp-content/uploads/2014/06/Why-Do-We-Need-Sleep.jpg",
166
- "https://www.sleepfoundation.org/wp-content/uploads/2021/08/Sleep-Latency-300x200.jpg",
167
- "https://www.sleepfoundation.org/wp-content/uploads/2021/02/shutterstock_1165389808-300x200.jpg",
168
- "https://www.sleepfoundation.org/wp-content/uploads/2013/12/32-NSF-11-G_bicycling_2880x1400.jpg",
169
- "https://pmc.ncbi.nlm.nih.gov/static/img/icon-https.svg",
170
- "https://pmc.ncbi.nlm.nih.gov/static/img/us_flag.svg",
171
- "https://cdn.ncbi.nlm.nih.gov/pmc/banners/logo-neurosciences.gif",
172
- "https://cdn.ncbi.nlm.nih.gov/pmc/blobs/5f19/10155483/aafb17b10f01/Neurosciences-28-2-91_page_3_2.jpg",
173
- "https://cdn.ncbi.nlm.nih.gov/pmc/blobs/5f19/10155483/4689be56a1f1/Neurosciences-28-2-91_page_3_3.jpg",
174
- "https://cdn.ncbi.nlm.nih.gov/pmc/blobs/5f19/10155483/d6f8b2352002/Neurosciences-28-2-91_page_3_5.jpg",
175
- "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCIgd2lkdGg9IjI0Ij48cGF0aCBkPSJNMCAwaDI0djI0SDB6IiBmaWxsPSJub25lIi8+PHBhdGggZmlsbD0iI2ZmZiIgZD0iTTE1LjUgMTRoLS43OWwtLjI4LS4yN0E2LjQ3MSA2LjQ3MSAwIDAgMCAxNiA5LjUgNi41IDYuNSAwIDEgMCA5LjUgMTZjMS42MSAwIDMuMDktLjU5IDQuMjMtMS41N2wuMjcuMjh2Ljc5bDUgNC45OUwyMC40OSAxOWwtNC45OS01em0tNiAwQzcuMDEgMTQgNSAxMS45OSA1IDkuNVM3LjAxIDUgOS41IDUgMTQgNy4wMSAxNCA5LjUgMTEuOTkgMTQgOS41IDE0eiIvPjwvc3ZnPg==",
176
- "https://cdn.ncbi.nlm.nih.gov/pmc/cms/images/pmc-card-share.jpg?_=0",
177
- "https://pmc.ncbi.nlm.nih.gov/static/img/icon-dot-gov.svg",
178
- "https://cdn.ncbi.nlm.nih.gov/pmc/blobs/5f19/10155483/940f09bfd840/Neurosciences-28-2-91_page_3_4.jpg",
179
- "https://pmc.ncbi.nlm.nih.gov/static/img/ncbi-logos/nih-nlm-ncbi--white.svg",
180
- "https://pmc.ncbi.nlm.nih.gov/static/img/usa-icons-bg/search--white.svg",
181
- "https://pmc.ncbi.nlm.nih.gov/static/img/usa-icons/close.svg",
182
- "https://cdn.ncbi.nlm.nih.gov/pmc/blobs/5f19/10155483/b09ec7af5f3e/Neurosciences-28-2-91_page_3_1.jpg",
183
- "https://pmc.ncbi.nlm.nih.gov/static/img/icon-https.svg",
184
- "https://pmc.ncbi.nlm.nih.gov/static/img/us_flag.svg",
185
- "https://cdn.ncbi.nlm.nih.gov/pmc/blobs/346b/5835037/685e9cdaf82f/fncir-12-00014-g0001.jpg",
186
- "https://cdn.ncbi.nlm.nih.gov/pmc/cms/images/pmc-card-share.jpg?_=0",
187
- "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCIgd2lkdGg9IjI0Ij48cGF0aCBkPSJNMCAwaDI0djI0SDB6IiBmaWxsPSJub25lIi8+PHBhdGggZmlsbD0iI2ZmZiIgZD0iTTE1LjUgMTRoLS43OWwtLjI4LS4yN0E2LjQ3MSA2LjQ3MSAwIDAgMCAxNiA5LjUgNi41IDYuNSAwIDEgMCA5LjUgMTZjMS42MSAwIDMuMDktLjU5IDQuMjMtMS41N2wuMjcuMjh2Ljc5bDUgNC45OUwyMC40OSAxOWwtNC45OS01em0tNiAwQzcuMDEgMTQgNSAxMS45OSA1IDkuNVM3LjAxIDUgOS41IDUgMTQgNy4wMSAxNCA5LjUgMTEuOTkgMTQgOS41IDE0eiIvPjwvc3ZnPg==",
188
- "https://pmc.ncbi.nlm.nih.gov/static/img/icon-dot-gov.svg",
189
- "https://pmc.ncbi.nlm.nih.gov/static/img/ncbi-logos/nih-nlm-ncbi--white.svg",
190
- "https://cdn.ncbi.nlm.nih.gov/pmc/banners/logo-frontneurcirc.gif",
191
- "https://pmc.ncbi.nlm.nih.gov/static/img/usa-icons-bg/search--white.svg",
192
- "https://cdn.ncbi.nlm.nih.gov/pmc/blobs/346b/5835037/a26e7bc8e133/fncir-12-00014-g0002.jpg",
193
- "https://pmc.ncbi.nlm.nih.gov/static/img/usa-icons/close.svg",
194
- "https://pmc.ncbi.nlm.nih.gov/static/img/icon-https.svg",
195
- "https://pmc.ncbi.nlm.nih.gov/static/img/us_flag.svg",
196
- "https://cdn.ncbi.nlm.nih.gov/pmc/cms/images/pmc-card-share.jpg?_=0",
197
- "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCIgd2lkdGg9IjI0Ij48cGF0aCBkPSJNMCAwaDI0djI0SDB6IiBmaWxsPSJub25lIi8+PHBhdGggZmlsbD0iI2ZmZiIgZD0iTTE1LjUgMTRoLS43OWwtLjI4LS4yN0E2LjQ3MSA2LjQ3MSAwIDAgMCAxNiA5LjUgNi41IDYuNSAwIDEgMCA5LjUgMTZjMS42MSAwIDMuMDktLjU5IDQuMjMtMS41N2wuMjcuMjh2Ljc5bDUgNC45OUwyMC40OSAxOWwtNC45OS01em0tNiAwQzcuMDEgMTQgNSAxMS45OSA1IDkuNVM3LjAxIDUgOS41IDUgMTQgNy4wMSAxNCA5LjUgMTEuOTkgMTQgOS41IDE0eiIvPjwvc3ZnPg==",
198
- "https://pmc.ncbi.nlm.nih.gov/static/img/icon-dot-gov.svg",
199
- "https://cdn.ncbi.nlm.nih.gov/pmc/blobs/47a0/6361301/6ec30be87d57/ssci-11-04-0217-g01.jpg",
200
- "https://pmc.ncbi.nlm.nih.gov/static/img/ncbi-logos/nih-nlm-ncbi--white.svg",
201
- "https://cdn.ncbi.nlm.nih.gov/pmc/banners/logo-ssci.png",
202
- "https://pmc.ncbi.nlm.nih.gov/static/img/usa-icons-bg/search--white.svg",
203
- "https://cdn.ncbi.nlm.nih.gov/pmc/blobs/47a0/6361301/b926e25b27d8/ssci-11-04-0217-g02.jpg",
204
- "https://pmc.ncbi.nlm.nih.gov/static/img/usa-icons/close.svg",
205
- "https://cdn.ncbi.nlm.nih.gov/pubmed/persistent/pubmed-meta-image-v2.jpg",
206
- "https://cdn.ncbi.nlm.nih.gov/corehtml/query/egifs/https:--linkinghub.elsevier.com-ihub-images-PubMedLink.gif",
207
- "https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg",
208
- "https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg",
209
- "https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png",
210
- "https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg",
211
- "https://cdn.ncbi.nlm.nih.gov/pubmed/14efe001-aebf-4097-9883-28c967333423/core/images/pubmed-logo-blue.svg",
212
- "https://pmc.ncbi.nlm.nih.gov/static/img/icon-https.svg",
213
- "https://pmc.ncbi.nlm.nih.gov/static/img/us_flag.svg",
214
- "https://cdn.ncbi.nlm.nih.gov/pmc/cms/images/pmc-card-share.jpg?_=0",
215
- "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIGhlaWdodD0iMjQiIHZpZXdCb3g9IjAgMCAyNCAyNCIgd2lkdGg9IjI0Ij48cGF0aCBkPSJNMCAwaDI0djI0SDB6IiBmaWxsPSJub25lIi8+PHBhdGggZmlsbD0iI2ZmZiIgZD0iTTE1LjUgMTRoLS43OWwtLjI4LS4yN0E2LjQ3MSA2LjQ3MSAwIDAgMCAxNiA5LjUgNi41IDYuNSAwIDEgMCA5LjUgMTZjMS42MSAwIDMuMDktLjU5IDQuMjMtMS41N2wuMjcuMjh2Ljc5bDUgNC45OUwyMC40OSAxOWwtNC45OS01em0tNiAwQzcuMDEgMTQgNSAxMS45OSA1IDkuNVM3LjAxIDUgOS41IDUgMTQgNy4wMSAxNCA5LjUgMTEuOTkgMTQgOS41IDE0eiIvPjwvc3ZnPg==",
216
- "https://pmc.ncbi.nlm.nih.gov/static/img/icon-dot-gov.svg",
217
- "https://cdn.ncbi.nlm.nih.gov/pmc/banners/logo-nihpa.png",
218
- "https://pmc.ncbi.nlm.nih.gov/static/img/ncbi-logos/nih-nlm-ncbi--white.svg",
219
- "https://pmc.ncbi.nlm.nih.gov/static/img/usa-icons-bg/search--white.svg",
220
- "https://pmc.ncbi.nlm.nih.gov/static/img/usa-icons/close.svg"
221
- ],
222
- "videos": [],
223
- "links": [
224
- {
225
- "url": "https://www.nhlbi.nih.gov/health/sleep/why-sleep-important",
226
- "title": "How Sleep Works - Why Is Sleep Important?",
227
- "summary": ""
228
- },
229
- {
230
- "url": "https://www.sleepfoundation.org/how-sleep-works/why-do-we-need-sleep",
231
- "title": "Why Do We Need Sleep?",
232
- "summary": ""
233
- },
234
- {
235
- "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC10155483/",
236
- "title": "The consequences of sleep deprivation on cognitive performance",
237
- "summary": ""
238
- },
239
- {
240
- "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC5835037/",
241
- "title": "Sleep Deprivation and the Epigenome",
242
- "summary": ""
243
- },
244
- {
245
- "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC6361301/",
246
- "title": "Sleep duration, lifestyles and chronic diseases: a cross-sectional population-based study",
247
- "summary": ""
248
- },
249
- {
250
- "url": "https://pubmed.ncbi.nlm.nih.gov/32858334/",
251
- "title": "The association between sleep duration and chronic diseases: a population-based cross-sectional study",
252
- "summary": ""
253
- },
254
- {
255
- "url": "https://pmc.ncbi.nlm.nih.gov/articles/PMC5567876/",
256
- "title": "Sleep Duration and Quality: Impact on Lifestyle Behaviors and Cardiometabolic Health: A Scientific Statement From the American Heart Association",
257
- "summary": ""
258
- }
259
- ],
260
- "references": []
261
- },
262
- "research_tree": {
263
- "query": "importance of sleep in our life",
264
- "importance": 0,
265
- "depth": 0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  "children": [
267
- {
268
- "query": "Investigate the specific molecular mechanisms by which sleep deprivation impacts cognitive function, focusing on neurotransmitter systems and synaptic plasticity.",
269
- "importance": 0.9,
270
- "depth": 1,
271
- "children": [
272
- {
273
- "query": "Investigate the specific epigenetic modifications induced by sleep deprivation in different brain regions and their correlation with changes in neurotransmitter receptor expression and synaptic plasticity markers.",
274
- "importance": 0.9,
275
- "depth": 2,
276
- "children": []
277
- },
278
- {
279
- "query": "Explore the role of specific neurotransmitter systems (e.g., dopamine, acetylcholine, glutamate) in mediating the cognitive deficits observed after sleep deprivation, focusing on the impact on synaptic transmission and plasticity.",
280
- "importance": 0.8,
281
- "depth": 2,
282
- "children": []
283
- },
284
- {
285
- "query": "Determine the reversibility of sleep deprivation-induced cognitive impairments and the underlying molecular mechanisms, including the potential for interventions targeting epigenetic modifications or neurotransmitter imbalances to restore cognitive function.",
286
- "importance": 0.7,
287
- "depth": 2,
288
- "children": []
289
- }
290
- ]
291
- },
292
- {
293
- "query": "Explore the relationship between sleep quality, duration, and the development of chronic diseases (e.g., cardiovascular disease, diabetes) across different age groups and demographics.",
294
- "importance": 0.8,
295
- "depth": 1,
296
- "children": [
297
- {
298
- "query": "Investigate the mediating role of specific lifestyle behaviors (e.g., diet, physical activity, substance use) in the relationship between sleep duration/quality and the development of cardiovascular disease, diabetes, and obesity across different age groups and demographics. This should include longitudinal studies to establish causality.",
299
- "importance": 0.9,
300
- "depth": 2,
301
- "children": []
302
- },
303
- {
304
- "query": "Conduct a comparative analysis of the impact of different sleep disorders (e.g., insomnia, sleep apnea, restless legs syndrome) on the risk of developing chronic diseases, considering age, gender, and socioeconomic status. Explore potential mechanisms linking specific sleep disorders to disease development.",
305
- "importance": 0.85,
306
- "depth": 2,
307
- "children": []
308
- },
309
- {
310
- "query": "Examine the bidirectional relationship between sleep and chronic diseases. Specifically, how do chronic diseases impact sleep quality and duration, and how do changes in sleep patterns influence the progression and management of these diseases? This should include studies on the impact of disease treatments on sleep.",
311
- "importance": 0.75,
312
- "depth": 2,
313
- "children": []
314
- }
315
- ]
316
- },
317
- {
318
- "query": "Conduct a longitudinal study to assess the impact of consistent sleep schedules and interventions promoting healthy sleep habits on mental health outcomes, including mood regulation and resilience to stress.",
319
- "importance": 0.7,
320
- "depth": 1,
321
- "children": []
322
- }
323
  ]
324
- },
325
- "metadata": {
326
- "total_sources": 9,
327
- "max_depth_reached": 2,
328
- "total_tokens": 4706
329
- }
 
 
 
330
  }
 
1
  {
2
+ "topic": "what are the different types of deep learning models",
3
+ "timestamp": "2025-03-07T18:04:44.789202",
4
+ "content": "Here's a comprehensive report on the different types of deep learning models, based on the provided research findings:
5
 
6
+ ## Deep Dive into Deep Learning Models
 
 
7
 
8
+ Deep learning, a subset of machine learning, utilizes artificial neural networks to learn from data. These networks, inspired by the human brain, are composed of interconnected layers of nodes (neurons) that process and transform data. This report provides an overview of deep learning, its applications, benefits, challenges, and, most importantly, the various types of deep learning models.
9
 
10
+ ### 1. What is Deep Learning?
11
 
12
+ Deep learning is a type of machine learning that employs artificial neural networks to learn from data. These networks are composed of multiple layers of interconnected nodes, each responsible for learning specific features of the data. The process involves training deep learning algorithms on large datasets of labeled data, allowing them to associate features with the correct labels. Once trained, these algorithms can make predictions on new, unseen data.
13
 
14
+ ### 2. Deep Learning Applications
15
 
16
+ Deep learning has found applications in a wide array of fields, including:
17
 
18
+ * **Image Recognition:** Identifying objects and features in images.
19
+ * **Natural Language Processing (NLP):** Understanding the meaning of text, enabling applications like chatbots and spam filters.
20
+ * **Finance:** Analyzing financial data and predicting market trends.
21
+ * **Text to Image:** Converting text into images.
22
 
23
+ ### 3. Benefits of Deep Learning Models
24
 
25
+ Deep learning models offer several advantages over traditional machine learning methods:
26
 
27
+ * **Learning Complex Relationships:** They can learn intricate relationships between features in data, leading to more powerful and accurate models.
28
+ * **Scalability:** They can be trained on large datasets, allowing them to learn from a wider range of experiences and make more accurate predictions.
29
+ * **Data-Driven Learning:** They require less human intervention, increasing efficiency and scalability.
30
 
31
+ ### 4. Challenges of Deep Learning Models
32
 
33
+ Despite their benefits, deep learning models also face several challenges:
34
 
35
+ * **Data Requirements:** They require large amounts of data to learn effectively, which can be a limitation in domains with limited data availability.
36
+ * **Overfitting:** They can overfit the training data, learning the noise rather than the underlying relationships.
37
+ * **Bias:** They can be biased based on the data they are trained on, leading to unfair or inaccurate predictions.
38
 
39
+ ### 5. Types of Deep Learning Models
40
 
41
+ The research findings highlight three common types of deep learning models:
42
 
43
+ * **Convolutional Neural Networks (CNNs):** CNNs are specifically designed for image recognition and processing tasks. They excel at identifying objects in images, even when the objects are partially obscured or distorted.
44
+ * **Recurrent Neural Networks (RNNs):** RNNs are well-suited for natural language processing and speech recognition. They are particularly effective at understanding the context of a sentence or phrase and can be used for tasks like text generation and language translation.
45
+ * **Deep Belief Networks (DBNs):** DBNs are generative models composed of multiple layers of stochastic, latent variables. They are used for feature extraction and dimensionality reduction.
46
 
47
+ ### 6. Other Deep Learning Architectures (From External Source)
48
 
49
+ The provided external source mentions additional deep learning architectures, including:
50
 
51
+ * **Long Short-Term Memory Networks (LSTMs):** A type of RNN designed to handle long-term dependencies in sequential data, making them suitable for tasks like speech recognition and time series prediction.
52
+ * **Gated Recurrent Unit (GRU):** A variant of LSTM with fewer parameters, making them suitable for smaller datasets.
53
+ * **Generative Adversarial Networks (GANs):** Used for generating realistic data, such as images, videos, and audio, by training two neural networks in a competitive setting.
54
+ * **Transformer Networks:** A powerful architecture that uses self-attention mechanisms to understand context and relationships in sequential data, making them suitable for NLP tasks like translation and text generation.
55
+ * **Autoencoders:** Used for dimensionality reduction, anomaly detection, and feature learning.
56
+ * **Deep Stacking Networks (DSNs):** A set of individual deep networks, each with its own hidden layers, designed to improve training and handle complex classification tasks.
57
 
58
+ ### 7. Semi-Supervised Learning
59
 
60
+ Semi-supervised learning is a machine learning approach that combines labeled and unlabeled data for training. This is particularly useful when labeled data is scarce and expensive to obtain, while unlabeled data is abundant. The goal is to leverage the information in both types of data to improve model performance.
61
 
62
+ ### 8. Reinforcement Learning
63
 
64
+ Reinforcement learning is a machine learning paradigm where an agent learns to make decisions in an environment to maximize a reward. The agent interacts with the environment, receives feedback (rewards or penalties), and adjusts its actions to achieve its goals.
65
 
66
+ ### 9. Reinforcement Learning from Human Feedback (RLHF)
67
 
68
+ RLHF is a technique used to improve large language models (LLMs) by incorporating human preferences into the training process. This involves training a reward model based on human feedback and then using this reward model to fine-tune the LLM using reinforcement learning. This approach has been used in models like ChatGPT and InstructGPT to generate more natural and human-like responses.
69
 
70
+ ### 10. Conclusion
71
 
72
+ Deep learning models are a powerful tool for solving complex problems across various domains. The choice of the right model depends on the specific task, the nature of the data, and the desired outcome. As the field of deep learning continues to evolve, new architectures and techniques will emerge, further expanding the capabilities of these models.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  ",
74
+ "media": {
75
+ "images": [
76
+ "https://deepsense.ai/wp-content/uploads/2024/11/Figure-1-Example-usage-of-ChatGPT-to-analyze-worst-case-time-complexity-of-bubble-sorting-in-the-specified-style.jpeg",
77
+ "https://addepto.com/wp-content/uploads/2023/02/featured-images_blog-16.jpg",
78
+ "https://addepto.com/wp-content/uploads/2021/08/at_is_entropy_in_machine_learning_1_.webp",
79
+ "https://deepsense.ai/wp-content/smush-webp/2025/03/Miniatura-Unstructured-1024x614.png.webp",
80
+ "https://deepsense.ai/wp-content/uploads/2024/11/Figure-3-Experiment-video-screenshot.png",
81
+ "https://addepto.com/wp-content/uploads/2020/07/Deep_Learning_Architecture_1_.webp",
82
+ "https://addepto.com/wp-content/uploads/2020/02/ing_-_5_Examples_Of_How_To_Use_It_1_.webp",
83
+ "https://i.ytimg.com/vi/olFxW7kdtP8/hqdefault.jpg",
84
+ "https://images.prismic.io/turing/652ec261fbd9a45bcec81941_Reinforcement_Learning_4_11zon_946380769c.webp?auto=format%2Ccompress&fit=max&w=3840",
85
+ "https://www.sas.com/en_ie/insights/articles/analytics/machine-learning-algorithms/_jcr_content/par02/image_8597.img.jpg/1494256305219.jpg",
86
+ "https://www.simplilearn.com/ice9/free_resources_article_thumb/ML-banner_1.jpg",
87
+ "https://www.simplilearn.com/ice9/free_resources_article_thumb/how_to_become_machine_learning_engineer.jpg",
88
+ "https://deepsense.ai/wp-content/uploads/2024/10/6-a-2.png",
89
+ "https://www.simplilearn.com/ice9/free_resources_article_thumb/AI_vs_Machine_Learning_vs_Deep_Learning.jpg",
90
+ "https://www.simplilearn.com/ice9/ebooks/ML_careerguide.jpg",
91
+ "https://www.simplilearn.com/ice9/ebooks/ML_eBook_FRS.jpg",
92
+ "https://www.datarobot.com/wp-content/uploads/2022/02/image3-2-1024x390.jpeg",
93
+ "https://images.prismic.io/turing/65980f9f531ac2845a2728df_reinforcement_learning_example_82cc17b798.webp?auto=format,compress",
94
+ "https://deepsense.ai/wp-content/uploads/2024/11/Figure-8-Fine-tuning-with-Reinforcement-Learning.png",
95
+ "https://www.datarobot.com/wp-content/uploads/2022/02/image5-1-1-1024x892.jpeg",
96
+ "https://images.prismic.io/turing/65980f9e531ac2845a2728de_reinforcement_learning_algorithm_2a63561d9a.webp?auto=format,compress",
97
+ "https://deepsense.ai/wp-content/uploads/2024/11/Figure-7-Reward-model-training.png",
98
+ "https://www.simplilearn.com/ice9/free_resources_article_thumb/Deep_learning_algorithm.jpg",
99
+ "https://media.geeksforgeeks.org/img-practice/prod/courses/554/Mobile/Other/data_science_1720847526.webp",
100
+ "https://media.geeksforgeeks.org/img-practice/prod/courses/405/Mobile/Other/Course_ML_&_DS_1720846555.webp",
101
+ "https://deepsense.ai/wp-content/smush-webp/2025/03/scott-rodgerson-PSpf_XgOM5w-unsplash-1024x683.jpg.webp",
102
+ "https://deepsense.ai/wp-content/smush-webp/2025/03/partnerzy_blog-1024x614.png.webp",
103
+ "https://media.geeksforgeeks.org/img-practice/prod/courses/808/Web/Content/ai-for-kids-webp_1728467027.webp",
104
+ "https://addepto.com/wp-content/uploads/2022/09/featured-images_blog-4.webp",
105
+ "https://ars.els-cdn.com/content/image/3-s2.0-B9781782421795500055-f05-65-9781782421795.jpg",
106
+ "https://www.simplilearn.com/ice9/free_resources_article_thumb/ArtificalNeuralNetwork.PNG",
107
+ "https://deepsense.ai/wp-content/uploads/2024/11/Figure-6-Language-model-pretraining.png",
108
+ "https://media.geeksforgeeks.org/wp-content/uploads/20220805171912/ProposedSemisupervisedLearningProcess.jpg",
109
+ "https://images.prismic.io/turing/65980fa0531ac2845a2728e0_reinforcement_learning_techniques_0d7f3f5e9c.webp?auto=format,compress",
110
+ "https://deepsense.ai/wp-content/uploads/2024/11/Figure-5-ChatGPT-fine-tuning-steps.png",
111
+ "https://addepto.com/wp-content/uploads/2024/12/contextcheck-2.jpg",
112
+ "https://deepsense.ai/wp-content/uploads/2024/11/Figure-4-Reinforcement-learning-from-human-feedback-training-loop.png",
113
+ "https://addepto.com/wp-content/uploads/2023/03/e-learning-consulting-featured-image.webp",
114
+ "https://deepsense.ai/wp-content/uploads/2024/10/6-a-3.png",
115
+ "https://deepsense.ai/wp-content/smush-webp/2024/11/How-can-we-improve-language-models-using-reinforcement-learning-ChatGPT-case-study-1024x303.jpeg.webp",
116
+ "https://deepsense.ai/wp-content/uploads/2024/11/Figure-2-Classic-reinforcement-learning-training-loop.png"
117
+ ],
118
+ "videos": [
119
+ "https://www.youtube.com/watch?v=oC7Cw3fu3gU",
120
+ "https://www.youtube.com/@deepsenseai",
121
+ "https://www.youtube.com/googlecloud",
122
+ "https://www.youtube.com/user/Simplilearn",
123
+ "https://www.youtube.com/c/sasusers",
124
+ "https://www.youtube.com/geeksforgeeksvideos",
125
+ "https://www.youtube.com/channel/UCLfxdtB3H0JdnniBnd4DoNA",
126
+ "https://www.youtube.com/SASsoftware",
127
+ "https://www.youtube.com/googlecloudplatform"
128
+ ],
129
+ "links": [
130
+ {
131
+ "text": "Learn more about this provider",
132
+ "url": "https://usercentrics.com/privacy-policy/"
133
+ },
134
+ {
135
+ "text": "Contact and support",
136
+ "url": "https://service.elsevier.com/app/contact/supporthub/sciencedirect/"
137
+ },
138
+ {
139
+ "text": "Cookiebot",
140
+ "url": "https://www.cookiebot.com"
141
+ },
142
+ {
143
+ "text": "Learn more about this provider",
144
+ "url": "https://privacy.microsoft.com/en-US/privacystatement"
145
+ },
146
+ {
147
+ "text": "Email",
148
+ "url": "mailto:subject=A%20guide%20to%20the%20types%20of%20machine%20learning%20algorithms%20%7C%20SAS%20Ireland&body=https%3A%2F%2Fwww.sas.com%2Fen_ie%2Finsights%2Farticles%2Fanalytics%2Fmachine-learning-algorithms.html"
149
+ },
150
+ {
151
+ "text": "",
152
+ "url": "https://www.facebook.com/simplilearn"
153
+ },
154
+ {
155
+ "text": "",
156
+ "url": "https://www.facebook.com/addeptoanalytics/"
157
+ },
158
+ {
159
+ "text": "Open In App",
160
+ "url": "https://geeksforgeeksapp.page.link/?link=https://www.geeksforgeeks.org/ml-semi-supervised-learning/?type%3Darticle%26id%3D308122&apn=free.programming.programming&isi=1641848816&ibi=org.geeksforgeeks.GeeksforGeeksDev&efr=1"
161
+ },
162
+ {
163
+ "text": "YouTube Channel",
164
+ "url": "https://www.youtube.com/c/sasusers"
165
+ },
166
+ {
167
+ "text": "YouTube",
168
+ "url": "https://www.youtube.com/SASsoftware"
169
+ },
170
+ {
171
+ "text": "Twitter",
172
+ "url": "https://twitter.com/intent/tweet?text=A%20guide%20to%20the%20types%20of%20machine%20learning%20algorithms%20%7C%20SAS%20Ireland&url=https%3A%2F%2Fwww.sas.com%2Fen_ie%2Finsights%2Farticles%2Fanalytics%2Fmachine-learning-algorithms.html"
173
+ },
174
+ {
175
+ "text": "",
176
+ "url": "https://www.twitter.com/simplilearn"
177
+ },
178
+ {
179
+ "text": "https://arxiv.org/abs/1706.03741",
180
+ "url": "https://arxiv.org/abs/1706.03741"
181
+ },
182
+ {
183
+ "text": "OpenCueOpen source render manager for visual effects and animation.",
184
+ "url": "https://www.opencue.io/docs/getting-started/"
185
+ },
186
+ {
187
+ "text": "Learn more about this provider",
188
+ "url": "https://www.evergage.com/privacy-policy/"
189
+ },
190
+ {
191
+ "text": "Learn more about this provider",
192
+ "url": "https://legal.hubspot.com/privacy-policy?__hstc=198943664.258746a941bc90ce1f5f381e8fb22ac0.1741350864696.1741350864696.1741350864696.1&__hssc=198943664.1.1741350864696&__hsfp=1951743191"
193
+ },
194
+ {
195
+ "text": "Learn more about this provider",
196
+ "url": "https://www.spotify.com/uk/legal/privacy-policy/"
197
+ },
198
+ {
199
+ "text": "",
200
+ "url": "https://www.instagram.com/geeks_for_geeks/"
201
+ },
202
+ {
203
+ "text": "Learn more about this provider",
204
+ "url": "http://www.sitescout.com/privacy"
205
+ },
206
+ {
207
+ "text": "https://openai.com/research/learning-from-human-preferences",
208
+ "url": "https://openai.com/research/learning-from-human-preferences"
209
+ },
210
+ {
211
+ "text": "Google Cloud Community",
212
+ "url": "https://www.googlecloudcommunity.com"
213
+ },
214
+ {
215
+ "text": "",
216
+ "url": "https://pl.linkedin.com/company/addepto"
217
+ },
218
+ {
219
+ "text": "Learn more about this provider",
220
+ "url": "https://legal.hubspot.com/privacy-policy"
221
+ },
222
+ {
223
+ "text": "",
224
+ "url": "https://twitter.com/geeksforgeeks"
225
+ },
226
+ {
227
+ "text": "Learn more about this provider",
228
+ "url": "https://www.linkedin.com/legal/privacy-policy"
229
+ },
230
+ {
231
+ "text": "AppSheetNo-code development platform to build and extend applications.",
232
+ "url": "https://about.appsheet.com/home/"
233
+ },
234
+ {
235
+ "text": "Check it out on Github!",
236
+ "url": "https://github.com/Addepto/contextcheck?utm_source=website&utm_medium=banner&utm_campaign=contextcheck"
237
+ },
238
+ {
239
+ "text": "",
240
+ "url": "https://twitter.com/addepto"
241
+ },
242
+ {
243
+ "text": "Learn more about this provider",
244
+ "url": "https://www.home.neustar/privacy"
245
+ },
246
+ {
247
+ "text": "",
248
+ "url": "https://www.elsevier.com/"
249
+ },
250
+ {
251
+ "text": "",
252
+ "url": "https://www.linkedin.com/company/simplilearn"
253
+ },
254
+ {
255
+ "text": "Press Corner",
256
+ "url": "https://www.googlecloudpresscorner.com"
257
+ },
258
+ {
259
+ "text": "LinkedIn",
260
+ "url": "https://www.linkedin.com/company/sas"
261
+ },
262
+ {
263
+ "text": "Get the Android App",
264
+ "url": "https://play.google.com/store/apps/details?id=com.mobile.simplilearn"
265
+ },
266
+ {
267
+ "text": "",
268
+ "url": "https://www.instagram.com/turingcom"
269
+ },
270
+ {
271
+ "text": "Learn more about this provider",
272
+ "url": "https://www.techtarget.com/privacy-policy-may25/?utm_source=cmp&utm_medium=banner&utm_campaign=consent&utm_term=privacy"
273
+ },
274
+ {
275
+ "text": "",
276
+ "url": "https://in.linkedin.com/company/geeksforgeeks"
277
+ },
278
+ {
279
+ "text": "https://openai.com/blog/chatgpt/",
280
+ "url": "https://openai.com/blog/chatgpt/"
281
+ },
282
+ {
283
+ "text": "Chrome EnterpriseChromeOS, Chrome Browser, and Chrome devices built for business.",
284
+ "url": "https://chromeenterprise.google"
285
+ },
286
+ {
287
+ "text": "Facebook",
288
+ "url": "https://www.facebook.com/SASsoftware"
289
+ },
290
+ {
291
+ "text": "Learn more about this provider",
292
+ "url": "https://www.adobe.com/privacy.html"
293
+ },
294
+ {
295
+ "text": "Sign In",
296
+ "url": "javascript:handleLogin('en_ie');"
297
+ },
298
+ {
299
+ "text": "",
300
+ "url": "https://www.youtube.com/geeksforgeeksvideos"
301
+ },
302
+ {
303
+ "text": "Learn more about this provider",
304
+ "url": "https://www.amazon.com/gp/help/customer/display.html/ref=footer_privacy?ie=UTF8&nodeId=468496"
305
+ },
306
+ {
307
+ "text": "Privacy policy",
308
+ "url": "https://www.elsevier.com/legal/privacy-policy"
309
+ },
310
+ {
311
+ "text": "hi@addepto.com",
312
+ "url": "mailto:hi@addepto.com"
313
+ },
314
+ {
315
+ "text": "",
316
+ "url": "https://www.facebook.com/turingcom"
317
+ },
318
+ {
319
+ "text": "Learn more about this provider",
320
+ "url": "https://documents.marketo.com/legal/cookies/"
321
+ },
322
+ {
323
+ "text": "Terms and conditions",
324
+ "url": "https://www.elsevier.com/legal/elsevier-website-terms-and-conditions"
325
+ },
326
+ {
327
+ "text": "https://openai.com/blog/instruction-following/",
328
+ "url": "https://openai.com/blog/instruction-following/"
329
+ },
330
+ {
331
+ "text": "",
332
+ "url": "https://t.me/simplilearnupdates"
333
+ },
334
+ {
335
+ "text": "",
336
+ "url": "https://www.cookiebot.com/en/what-is-behind-powered-by-cookiebot/"
337
+ },
338
+ {
339
+ "text": "LinkedIn",
340
+ "url": "https://www.linkedin.com/company/deepsense-ai/"
341
+ },
342
+ {
343
+ "text": "Learn more about this provider",
344
+ "url": "https://www.openx.com/legal/privacy-policy/"
345
+ },
346
+ {
347
+ "text": "",
348
+ "url": "https://web.telegram.org/#/im?p=@simplilearnupdates"
349
+ },
350
+ {
351
+ "text": "ContextClue",
352
+ "url": "https://context-clue.com/"
353
+ },
354
+ {
355
+ "text": "Facebook",
356
+ "url": "https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Faddepto.com%2Fblog%2Fdeep-learning-architecture%2F"
357
+ },
358
+ {
359
+ "text": "Learn more about this provider",
360
+ "url": "https://soundcloud.com/pages/privacy"
361
+ },
362
+ {
363
+ "text": "About Google",
364
+ "url": "https://about.google"
365
+ },
366
+ {
367
+ "text": "Google Cloud on YouTube",
368
+ "url": "https://www.youtube.com/googlecloud"
369
+ },
370
+ {
371
+ "text": "About ScienceDirect",
372
+ "url": "https://www.elsevier.com/solutions/sciencedirect"
373
+ },
374
+ {
375
+ "text": "Follow on X",
376
+ "url": "https://x.com/googlecloud"
377
+ },
378
+ {
379
+ "text": "Shopping cart",
380
+ "url": "https://sd-cart.elsevier.com/"
381
+ },
382
+ {
383
+ "text": "",
384
+ "url": "https://www.instagram.com/simplilearn_official/"
385
+ },
386
+ {
387
+ "text": "Learn more about this provider",
388
+ "url": "https://twitter.com/en/privacy"
389
+ },
390
+ {
391
+ "text": "Learn more about this provider",
392
+ "url": "https://www.cookiebot.com/goto/privacy-policy/"
393
+ },
394
+ {
395
+ "text": "",
396
+ "url": "https://www.relx.com/"
397
+ },
398
+ {
399
+ "text": "",
400
+ "url": "https://x.com/deepsense_ai"
401
+ },
402
+ {
403
+ "text": "Add to Mendeley",
404
+ "url": "https://www.mendeley.com/reference-management/web-importer?utm_source=science_direct&utm_campaign=web_importer_cross_sell"
405
+ },
406
+ {
407
+ "text": "KnativeComponents to create Kubernetes-native cloud-based software.",
408
+ "url": "https://knative.dev/docs/"
409
+ },
410
+ {
411
+ "text": "Twitter",
412
+ "url": "https://twitter.com/SASsoftware"
413
+ },
414
+ {
415
+ "text": "",
416
+ "url": "https://www.youtube.com/user/Simplilearn"
417
+ },
418
+ {
419
+ "text": "",
420
+ "url": "https://www.youtube.com/channel/UCLfxdtB3H0JdnniBnd4DoNA"
421
+ },
422
+ {
423
+ "text": "Facebook",
424
+ "url": "https://www.facebook.com/sharer/sharer.php?u=https://deepsense.ai/blog/using-reinforcement-learning-to-improve-large-language-models/&nb=1"
425
+ },
426
+ {
427
+ "text": "Learn more about this provider",
428
+ "url": "https://www.hotjar.com/legal/policies/privacy/"
429
+ },
430
+ {
431
+ "text": "LinkedIn",
432
+ "url": "https://www.linkedin.com/sharing/share-offsite/?url=https://deepsense.ai/blog/using-reinforcement-learning-to-improve-large-language-models/&nb=1"
433
+ },
434
+ {
435
+ "text": "Twitter",
436
+ "url": "https://twitter.com/intent/tweet?text=Deep+Learning+Architecture+Examples&url=https%3A%2F%2Faddepto.com%2Fblog%2Fdeep-learning-architecture%2F"
437
+ },
438
+ {
439
+ "text": "Merchandise",
440
+ "url": "https://www.sascompanystore.com/"
441
+ },
442
+ {
443
+ "text": "Chrome Enterprise PremiumGet secure enterprise browsing with extensive endpoint visibility.",
444
+ "url": "https://chromeenterprise.google/products/chrome-enterprise-premium/"
445
+ },
446
+ {
447
+ "text": "Careers",
448
+ "url": "https://geeksforgeeks.zohorecruit.in/careers?ref=footer"
449
+ },
450
+ {
451
+ "text": "Subscribe to newsletter",
452
+ "url": "https://addepto.us20.list-manage.com/subscribe?u=06ea2fe321b296590739d716f&id=0b963312a3"
453
+ },
454
+ {
455
+ "text": "LinkedIn",
456
+ "url": "https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Faddepto.com%2Fblog%2Fdeep-learning-architecture%2F&title=Deep+Learning+Architecture+Examples"
457
+ },
458
+ {
459
+ "text": "",
460
+ "url": "https://twitter.com/turingcom"
461
+ },
462
+ {
463
+ "text": "Medium",
464
+ "url": "https://medium.com/deepsense-ai"
465
+ },
466
+ {
467
+ "text": "Google Cloud Tech on YouTube",
468
+ "url": "https://www.youtube.com/googlecloudplatform"
469
+ },
470
+ {
471
+ "text": "X",
472
+ "url": "https://twitter.com/intent/tweet?text=https://deepsense.ai/blog/using-reinforcement-learning-to-improve-large-language-models/&nb=1"
473
+ },
474
+ {
475
+ "text": "https://huggingface.co/blog/rlhf",
476
+ "url": "https://huggingface.co/blog/rlhf"
477
+ },
478
+ {
479
+ "text": "https://wandb.ai/ayush-thakur/RLHF/reports/Understanding-Reinforcement-Learning-from-Human-Feedback-RLHF-Part-1\u2013VmlldzoyODk5MTIx",
480
+ "url": "https://wandb.ai/ayush-thakur/RLHF/reports/Understanding-Reinforcement-Learning-from-Human-Feedback-RLHF-Part-1--VmlldzoyODk5MTIx"
481
+ },
482
+ {
483
+ "text": "X",
484
+ "url": "https://twitter.com/deepsense_ai"
485
+ },
486
+ {
487
+ "text": "Facebook",
488
+ "url": "http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.sas.com%2Fen_ie%2Finsights%2Farticles%2Fanalytics%2Fmachine-learning-algorithms.html"
489
+ },
490
+ {
491
+ "text": "",
492
+ "url": "mailto:?subject=Semi-Supervised%20Learning%20in%20Artificial%20Intelligence&body=Hello,%0A %0A I found this at datarobot.com and thought you might be interested. Take a look here: https%3A%2F%2Fwww.datarobot.com%2Fblog%2Fsemi-supervised-learning%2F%0A %0A Thank you."
493
+ },
494
+ {
495
+ "text": "",
496
+ "url": "https://www.linkedin.com/shareArticle?mini=true&url=https%3A%2F%2Fwww.datarobot.com%2Fblog%2Fsemi-supervised-learning%2F"
497
+ },
498
+ {
499
+ "text": "Facebook",
500
+ "url": "https://www.facebook.com/deepsenseai"
501
+ },
502
+ {
503
+ "text": "Learn more about this provider",
504
+ "url": "https://business.safety.google/privacy/"
505
+ },
506
+ {
507
+ "text": "LinkedIn",
508
+ "url": "https://www.linkedin.com/sharing/share-offsite/?url=https%3A%2F%2Fwww.sas.com%2Fen_ie%2Finsights%2Farticles%2Fanalytics%2Fmachine-learning-algorithms.html"
509
+ },
510
+ {
511
+ "text": "",
512
+ "url": "https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fwww.datarobot.com%2Fblog%2Fsemi-supervised-learning%2F"
513
+ },
514
+ {
515
+ "text": "video",
516
+ "url": "https://www.youtube.com/watch?v=oC7Cw3fu3gU"
517
+ },
518
+ {
519
+ "text": "Search Jobs",
520
+ "url": "https://globalcareers-sas.icims.com/jobs/intro"
521
+ },
522
+ {
523
+ "text": "",
524
+ "url": "https://geeksforgeeksapp.page.link/gfg-app"
525
+ },
526
+ {
527
+ "text": "",
528
+ "url": "https://github.com/Addepto/contextcheck"
529
+ },
530
+ {
531
+ "text": "Advertise",
532
+ "url": "https://www.elsmediakits.com"
533
+ },
534
+ {
535
+ "text": "Evaluate Your RAG-Powered Chatbots",
536
+ "url": "https://context-clue.com/contextcheck/"
537
+ },
538
+ {
539
+ "text": "Learn more about this provider",
540
+ "url": "https://www.facebook.com/policy.php/"
541
+ },
542
+ {
543
+ "text": "",
544
+ "url": "https://twitter.com/intent/tweet?url=https%3A%2F%2Fwww.datarobot.com%2Fblog%2Fsemi-supervised-learning%2F&text="
545
+ },
546
+ {
547
+ "text": "YouTube",
548
+ "url": "https://www.youtube.com/@deepsenseai"
549
+ },
550
+ {
551
+ "text": "Follow",
552
+ "url": "https://news.google.com/publications/CAAqBwgKMLTrzwsw44bnAw?hl=en-IN&gl=IN&ceid=IN%3Aen"
553
+ },
554
+ {
555
+ "text": "",
556
+ "url": "https://www.facebook.com/geeksforgeeks.org/"
557
+ },
558
+ {
559
+ "text": "Learn more about this provider",
560
+ "url": "https://vwo.com/privacy-policy/"
561
+ },
562
+ {
563
+ "text": "Learn more about this provider",
564
+ "url": "https://www.zoho.com/privacy.html"
565
+ },
566
+ {
567
+ "text": "Get the iOS App",
568
+ "url": "https://apps.apple.com/app/simplilearn/id963042747?ls=1"
569
+ },
570
+ {
571
+ "text": "",
572
+ "url": "https://www.linkedin.com/company/turingcom"
573
+ }
574
+ ],
575
+ "references": []
576
+ },
577
+ "research_tree": {
578
+ "query": "what are the different types of deep learning models",
579
+ "depth": 0,
580
+ "children": [
581
+ {
582
+ "query": "deep learning model architectures comparison",
583
+ "depth": 1,
584
+ "children": [
585
+ {
586
+ "query": "Transformer models in NLP: advancements and applications",
587
+ "depth": 2,
588
+ "children": []
589
+ },
590
+ {
591
+ "query": "Graph Neural Networks (GNNs) for social network analysis",
592
+ "depth": 2,
593
+ "children": []
594
+ },
595
+ {
596
+ "query": "Deep Reinforcement Learning with DQNs: challenges and solutions",
597
+ "depth": 2,
598
+ "children": []
599
+ }
600
+ ]
601
+ },
602
+ {
603
+ "query": "applications of semi-supervised learning in deep learning",
604
+ "depth": 1,
605
+ "children": [
606
+ {
607
+ "query": "Transformer models advancements in NLP and computer vision",
608
+ "depth": 2,
609
+ "children": []
610
+ },
611
+ {
612
+ "query": "Applications of Graph Neural Networks (GNNs) in various domains",
613
+ "depth": 2,
614
+ "children": []
615
+ },
616
+ {
617
+ "query": "Semi-supervised learning techniques for handling imbalanced datasets",
618
+ "depth": 2,
619
+ "children": []
620
+ }
621
+ ]
622
+ },
623
+ {
624
+ "query": "advanced reinforcement learning algorithms and implementations",
625
+ "depth": 1,
626
  "children": [
627
+ {
628
+ "query": "deep learning model architectures comparison RNN LSTM GRU CNN Transformer GAN",
629
+ "depth": 2,
630
+ "children": []
631
+ },
632
+ {
633
+ "query": "reinforcement learning for large language models RLHF PPO algorithm challenges",
634
+ "depth": 2,
635
+ "children": []
636
+ },
637
+ {
638
+ "query": "semi-supervised learning applications text classification clustering examples",
639
+ "depth": 2,
640
+ "children": []
641
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
642
  ]
643
+ }
644
+ ]
645
+ },
646
+ "metadata": {
647
+ "total_queries": 12,
648
+ "total_sources": 10,
649
+ "max_depth_reached": 2,
650
+ "total_tokens": 573199
651
+ }
652
  }
backend/research_node.py CHANGED
@@ -8,9 +8,6 @@ class ResearchNode:
8
  self.depth = depth
9
  self.children: List[ResearchNode] = []
10
  self.data: List[Dict[str, Any]] = []
11
- self.explored = False
12
- self.importance_score = 0.0
13
- self.timestamp = datetime.now()
14
 
15
  def add_child(self, query: str) -> 'ResearchNode':
16
  child = ResearchNode(query, parent=self, depth=self.depth + 1)
@@ -24,7 +21,7 @@ class ResearchNode:
24
  current = current.parent
25
  path.append(current.query)
26
  return list(reversed(path))
27
-
28
  def max_depth(self) -> int:
29
  if not self.children:
30
  return self.depth
@@ -34,3 +31,9 @@ class ResearchNode:
34
  if not self.children:
35
  return 0
36
  return len(self.children) + sum([child.total_children() for child in self.children])
 
 
 
 
 
 
 
8
  self.depth = depth
9
  self.children: List[ResearchNode] = []
10
  self.data: List[Dict[str, Any]] = []
 
 
 
11
 
12
  def add_child(self, query: str) -> 'ResearchNode':
13
  child = ResearchNode(query, parent=self, depth=self.depth + 1)
 
21
  current = current.parent
22
  path.append(current.query)
23
  return list(reversed(path))
24
+
25
  def max_depth(self) -> int:
26
  if not self.children:
27
  return self.depth
 
31
  if not self.children:
32
  return 0
33
  return len(self.children) + sum([child.total_children() for child in self.children])
34
+
35
+ def get_all_data(self) -> List[Dict[str, Any]]:
36
+ data = self.data
37
+ for child in self.children:
38
+ data.extend(child.get_all_data())
39
+ return data
backend/scraper.py CHANGED
@@ -8,6 +8,7 @@ from crawl4ai import AsyncWebCrawler, BrowserConfig, CacheMode
8
  import newspaper
9
  from newspaper import Article
10
  import requests
 
11
 
12
 
13
  class WebScraper:
@@ -170,6 +171,7 @@ class CrawlForAIScraper:
170
  async def start(self):
171
  if not self._is_started:
172
  await self.crawler.start()
 
173
  self._is_started = True
174
 
175
  async def close(self):
@@ -178,12 +180,14 @@ class CrawlForAIScraper:
178
  self._is_started = False
179
 
180
  async def search_and_scrape(self, query: str, num_sites: int = 3) -> List[Dict[str, Any]]:
181
- if not self._is_started:
182
- await self.start()
183
  self.logger.info(f"Starting search for: {query}")
184
- search_results = self._google_search(query, num_sites)
 
 
185
  self.logger.info(f"Found {len(search_results)} search results")
186
 
 
187
  scraped_data = []
188
  for idx, url in enumerate(search_results):
189
  try:
@@ -196,59 +200,110 @@ class CrawlForAIScraper:
196
  self.logger.error(f"Error scraping {url}: {str(e)}")
197
  continue
198
 
199
- await self.crawler.close()
200
  self.logger.info(f"Completed scraping {len(scraped_data)} sites")
201
  return scraped_data
202
 
203
  async def _google_search(self, query: str, num_results: int) -> List[str]:
204
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  async def _scrape_page(self, url: str) -> Dict[str, Any]:
207
- if not self._is_started:
208
- await self.start()
209
 
210
  try:
211
  # Run the crawler on a URL
212
- result = await self.crawler.arun(url=url, screenshot=False, cache_mode=CacheMode.BYPASS)
213
  soup = BeautifulSoup(result.html, "html.parser")
214
  data = {
215
  "url": url,
216
  "text": result.markdown,
217
- "images": self._extract_images(soup),
218
- "videos": result.media["videos"],
219
- "links": result.links,
220
  }
221
 
222
  return data
223
 
224
  except Exception as e:
225
- # self.logger.error(f"Scraping error for {url}: {str(e)}")
226
- raise e
227
  return {}
228
 
229
- def _extract_text(self, soup: BeautifulSoup) -> str:
230
- pass
231
-
232
- def _extract_images(self, soup: BeautifulSoup) -> List[str]:
233
- images = [img['src'] for img in soup.find_all('img') if 'src' in img.attrs and int(img.get('width', 0)) > 300 and int(img.get('height', 0)) > 300 and 'pixel' not in img['src'] and 'icon' not in img['src']]
234
- images = sorted(images, key=lambda src: -1 * (int(soup.find('img', {'src': src}).get('width', 0)) * int(soup.find('img', {'src': src}).get('height', 0))))
 
 
 
 
 
 
 
 
 
 
 
235
  return images
236
 
237
  def _extract_videos(self, soup: BeautifulSoup) -> List[str]:
238
- pass
239
-
240
- def _extract_links(self, soup: BeautifulSoup) -> List[str]:
241
- pass
242
-
243
- def _merge_extraction_results(self, news_data: Dict, selenium_data: Dict) -> Dict[str, Any]:
244
- pass
 
 
 
 
 
 
 
 
 
 
245
 
246
 
247
  if __name__ == "__main__":
 
 
 
 
248
  async def main():
249
  scraper = CrawlForAIScraper()
250
  await scraper.start()
251
- data = await scraper._scrape_page("https://www.videojamai.com")
252
  await scraper.close()
 
 
253
  print(json.dumps(data, indent=2))
254
  asyncio.run(main())
 
8
  import newspaper
9
  from newspaper import Article
10
  import requests
11
+ import time
12
 
13
 
14
  class WebScraper:
 
171
  async def start(self):
172
  if not self._is_started:
173
  await self.crawler.start()
174
+ time.sleep(1)
175
  self._is_started = True
176
 
177
  async def close(self):
 
180
  self._is_started = False
181
 
182
  async def search_and_scrape(self, query: str, num_sites: int = 3) -> List[Dict[str, Any]]:
183
+ await self.start()
 
184
  self.logger.info(f"Starting search for: {query}")
185
+
186
+ # Perform a Google search to get a list of webpages
187
+ search_results = await self._google_search(query, num_sites)
188
  self.logger.info(f"Found {len(search_results)} search results")
189
 
190
+ # Scrape each webpage
191
  scraped_data = []
192
  for idx, url in enumerate(search_results):
193
  try:
 
200
  self.logger.error(f"Error scraping {url}: {str(e)}")
201
  continue
202
 
 
203
  self.logger.info(f"Completed scraping {len(scraped_data)} sites")
204
  return scraped_data
205
 
206
  async def _google_search(self, query: str, num_results: int) -> List[str]:
207
+ self.logger.info("Performing Google search...")
208
+ try:
209
+ encoded_query = quote_plus(query)
210
+ search_uri = f"https://www.google.com/search?q={encoded_query}"
211
+
212
+ result = await self.crawler.arun(url=search_uri, screenshot=False, cache_mode=CacheMode.BYPASS, delay_before_return_html=2, page_timeout=25000, scan_full_page=True)
213
+
214
+ soup = BeautifulSoup(result.html, "html.parser")
215
+ search_results = []
216
+
217
+ for link in list(soup.select("div > span > a"))[2:]:
218
+ url = link.get("href").replace(" ", "").replace("\n", "").strip()
219
+ if not url.startswith(("http://", "https://")):
220
+ url = "https://" + url
221
+ search_results.append(url)
222
+ if len(search_results) >= num_results:
223
+ break
224
+
225
+ self.logger.info(f"Found {len(search_results)} URLs")
226
+ return search_results
227
+
228
+ except requests.exceptions.RequestException as e:
229
+ self.logger.error(f"Google search error: {str(e)}")
230
+ return []
231
+ except Exception as e:
232
+ self.logger.error(f"Google search error: {str(e)}")
233
+ return []
234
 
235
  async def _scrape_page(self, url: str) -> Dict[str, Any]:
236
+ await self.start()
 
237
 
238
  try:
239
  # Run the crawler on a URL
240
+ result = await self.crawler.arun(url=url, screenshot=False, cache_mode=CacheMode.BYPASS, delay_before_return_html=2, page_timeout=25000, scan_full_page=True)
241
  soup = BeautifulSoup(result.html, "html.parser")
242
  data = {
243
  "url": url,
244
  "text": result.markdown,
245
+ "images": self._extract_images(soup, result.url),
246
+ "videos": self._extract_videos(soup),
247
+ "links": result.links["external"],
248
  }
249
 
250
  return data
251
 
252
  except Exception as e:
253
+ self.logger.error(f"Scraping error for {url}: {str(e)}")
254
+ # raise e
255
  return {}
256
 
257
+ def _extract_images(self, soup: BeautifulSoup, url: str) -> List[str]:
258
+ # Extract images with width and height greater than 300 pixels
259
+ images = []
260
+ for img in soup.find_all('img'):
261
+ if 'src' in img.attrs:
262
+ src = img['src']
263
+ # remove px or any characters from width and height
264
+ width = int(''.join(filter(str.isdigit, img.get('width', '0'))))
265
+ height = int(''.join(filter(str.isdigit, img.get('height', '0'))))
266
+ if width > 300 and height > 300 and 'pixel' not in src and 'icon' not in src:
267
+ images.append((src, width, height))
268
+ images = sorted(images, key=lambda img: -1 * (img[1] * img[2]))
269
+ images = [img[0] for img in images]
270
+
271
+ # Add base URL to relative URLs
272
+ base_url = '/'.join(url.split('/')[:3])
273
+ images = [img if img.startswith('http') else base_url + img for img in images]
274
  return images
275
 
276
  def _extract_videos(self, soup: BeautifulSoup) -> List[str]:
277
+ # Extract videos from iframes and video tags
278
+ videos = []
279
+ nodes = list(soup.find_all('iframe')) + list(soup.find_all('video')) + list(soup.find_all('a'))
280
+ for node in nodes:
281
+ if node.name == 'iframe':
282
+ src = node.get('src', '')
283
+ if 'youtube.com' in src or 'youtu.be' in src:
284
+ videos.append(src)
285
+ elif node.name == 'video':
286
+ src = node.get('src', '')
287
+ if 'youtube.com' in src or 'youtu.be' in src:
288
+ videos.append(src)
289
+ elif node.name == 'a':
290
+ href = node.get('href', '')
291
+ if 'youtube.com' in href or 'youtu.be' in href:
292
+ videos.append(href)
293
+ return videos
294
 
295
 
296
  if __name__ == "__main__":
297
+ import sys
298
+ url = "https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview"
299
+ if len(sys.argv) > 1:
300
+ url = sys.argv[1]
301
  async def main():
302
  scraper = CrawlForAIScraper()
303
  await scraper.start()
304
+ data = await scraper.search_and_scrape("what is ai")
305
  await scraper.close()
306
+ with open("output.json", "w") as f:
307
+ f.write(json.dumps(data, indent=2))
308
  print(json.dumps(data, indent=2))
309
  asyncio.run(main())