Soham Waghmare commited on
Commit
63a0765
·
1 Parent(s): 02298d2

feat: refactor, abstract, simplify

Browse files
Files changed (5) hide show
  1. .gitignore +1 -0
  2. backend/app.py +8 -3
  3. backend/knet.py +151 -177
  4. backend/research_node.py +6 -3
  5. backend/scraper.py +1 -1
.gitignore CHANGED
@@ -10,6 +10,7 @@ backend/.venv/
10
  backend/.env*
11
  backend/downloads/*
12
  backend/output.json
 
13
 
14
  # Next.js ignore files
15
  # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 
10
  backend/.env*
11
  backend/downloads/*
12
  backend/output.json
13
+ backend/.ruff_cache/
14
 
15
  # Next.js ignore files
16
  # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
backend/app.py CHANGED
@@ -31,7 +31,12 @@ app.add_middleware(
31
  allow_headers=["*"],
32
  )
33
 
34
- sio = socketio.AsyncServer(cors_allowed_origins=CORS_ALLOWED_ORIGINS, ping_timeout=120, ping_interval=10, async_mode="asgi")
 
 
 
 
 
35
  app.mount("/", socketio.ASGIApp(sio))
36
 
37
 
@@ -78,7 +83,7 @@ async def health_check(sid, data):
78
  @sio.event
79
  async def start_research(sid, data):
80
  try:
81
- data = json.loads(data) if type(data) != dict else data
82
  topic = data.get("topic")
83
  max_depth: int = data.get("max_depth")
84
  max_breadth: int = data.get("max_breadth")
@@ -114,7 +119,7 @@ async def start_research(sid, data):
114
  async def test(sid, data):
115
  knet, _ = await session_manager.get_or_create_session(sid)
116
  print("Testing...")
117
- data = json.loads(data) if type(data) != dict else data
118
  res = await knet.scraper._scrape_page(data["url"])
119
  print(json.dumps(res, indent=2))
120
  await sio.emit("test", res, room=sid)
 
31
  allow_headers=["*"],
32
  )
33
 
34
+ sio = socketio.AsyncServer(
35
+ cors_allowed_origins=CORS_ALLOWED_ORIGINS,
36
+ ping_timeout=120,
37
+ ping_interval=10,
38
+ async_mode="asgi",
39
+ )
40
  app.mount("/", socketio.ASGIApp(sio))
41
 
42
 
 
83
  @sio.event
84
  async def start_research(sid, data):
85
  try:
86
+ data = json.loads(data) if type(data) is not dict else data
87
  topic = data.get("topic")
88
  max_depth: int = data.get("max_depth")
89
  max_breadth: int = data.get("max_breadth")
 
119
  async def test(sid, data):
120
  knet, _ = await session_manager.get_or_create_session(sid)
121
  print("Testing...")
122
+ data = json.loads(data) if type(data) is not dict else data
123
  res = await knet.scraper._scrape_page(data["url"])
124
  print(json.dumps(res, indent=2))
125
  await sio.emit("test", res, room=sid)
backend/knet.py CHANGED
@@ -16,84 +16,9 @@ from research_node import ResearchNode
16
  load_dotenv()
17
 
18
 
19
- class ResearchProgress:
20
- def __init__(self, callback):
21
- self.progress = 0
22
- self.callback = callback
23
-
24
- async def update(self, progress: int, message: str):
25
- self.progress += progress
26
- if self.progress > 100:
27
- self.progress = 100
28
- if self.callback:
29
- await self.callback({"progress": self.progress, "message": message})
30
-
31
-
32
- class KNet:
33
- def __init__(self, scraper_instance, max_depth: int = 1, max_breadth: int = 1, num_sites_per_query: int = 5):
34
- self.api_key = os.getenv("GOOGLE_API_KEY")
35
- assert self.api_key, "Google API key is required"
36
-
37
- # Initialize Google GenAI
38
- genai.configure(api_key=self.api_key)
39
-
40
- # Keep both models with original configurations
41
- generation_config = {"temperature": 0.9}
42
- safe = [
43
- {
44
- "category": "HARM_CATEGORY_DANGEROUS",
45
- "threshold": "BLOCK_NONE",
46
- },
47
- {
48
- "category": "HARM_CATEGORY_HARASSMENT",
49
- "threshold": "BLOCK_NONE",
50
- },
51
- {
52
- "category": "HARM_CATEGORY_HATE_SPEECH",
53
- "threshold": "BLOCK_NONE",
54
- },
55
- {
56
- "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
57
- "threshold": "BLOCK_NONE",
58
- },
59
- {
60
- "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
61
- "threshold": "BLOCK_NONE",
62
- },
63
- ]
64
- self.llm = genai.GenerativeModel(
65
- "gemini-2.0-flash-lite-preview-02-05",
66
- generation_config=generation_config,
67
- safety_settings=safe,
68
- )
69
- self.ctx_researcher = []
70
-
71
- self.research_manager = genai.GenerativeModel(
72
- "gemini-2.0-flash-lite-preview-02-05",
73
- generation_config=generation_config,
74
- safety_settings=safe,
75
- )
76
- self.ctx_manager = []
77
-
78
- # Initialize scraper
79
- self.scraper = scraper_instance
80
-
81
- self.logger = logging.getLogger(__name__)
82
- self.max_depth = max_depth
83
- self.max_breadth = max_breadth
84
- self.num_sites_per_query = num_sites_per_query
85
-
86
- self.search_prompt = """Generate 3-5 specific search queries to research the following topic: {topic}
87
-
88
- Requirements:
89
- 1. Queries should cover different aspects of the topic
90
- 2. Be specific and technical
91
- 3. Include key terms and concepts
92
- 4. Format each query on a new line
93
- 5. Return only the queries, no explanations"""
94
-
95
- self.token_count = 0
96
- self.branch_decision_prompt = """Given the current research context and findings, should we explore this branch deeper?
97
 
98
  Current Topic: {query}
99
  Current Depth: {depth}
@@ -107,10 +32,24 @@ class KNet:
107
  3. Depth vs breadth tradeoff
108
  4. Information saturation
109
 
110
- Return only: decision: true/false"""
 
 
 
 
 
 
 
 
 
111
 
112
- # Simplified decision schema for branching
113
- self.branch_schema = {
 
 
 
 
 
114
  "response_schema": content.Schema(
115
  type=content.Type.OBJECT,
116
  required=["decision"],
@@ -121,8 +60,7 @@ class KNet:
121
  "response_mime_type": "application/json",
122
  }
123
 
124
- # Analysis schema
125
- self.analysis_schema = {
126
  "response_schema": content.Schema(
127
  type=content.Type.OBJECT,
128
  required=["branches"],
@@ -143,55 +81,63 @@ class KNet:
143
  "response_mime_type": "application/json",
144
  }
145
 
146
- def _track_tokens(self, tokens: int) -> None:
147
- self.token_count += tokens
148
 
149
- def _should_branch_deeper(self, node: ResearchNode, topic: str, retry_count: int = 0) -> bool:
150
- try:
151
- if node.depth > self.max_depth:
152
- return False
153
 
154
- # Generate summary of key findings into research_manager's context
155
- if node.data:
156
- findings = ("\n" + "-" * 10 + "Next data" + "-" * 10 + "\n").join([json.dumps(d, indent=2) for d in node.data])
157
- response = self.llm.generate_content(
158
- f"Extract key findings from the following data related to the topic '{topic}':\n{findings}"
159
- )
160
- self._track_tokens(response.usage_metadata.total_token_count)
161
- findings = response.text
162
- self.ctx_manager.append(findings)
163
 
164
- # Research manager takes decision to proceed or not
165
- prompt = self.branch_decision_prompt.format(
166
- query=node.query,
167
- depth=node.depth,
168
- path=" -> ".join(node.get_path_to_root()),
169
- findings="\n".join(self.ctx_manager),
170
- )
171
- response = self.research_manager.generate_content(prompt, generation_config={**self.branch_schema})
172
- self._track_tokens(response.usage_metadata.total_token_count)
173
- result = json.loads(response.text)
174
- self.logger.info(f"Branch decision for '{node.query}': {result['decision']}")
175
 
176
- return result["decision"]
177
- except Exception as e:
178
- if result["candidates"][0]["finishReason"] == "RECITATION":
179
- self.logger.error(f"Retrying branch decision: {str(e)}\nC:{retry_count / 3}")
180
- self._should_branch_deeper(node, topic, retry_count + 1)
181
- self.logger.error(f"Branch decision failed: {str(e)}")
182
- raise e
183
-
184
- async def conduct_research(
185
- self, topic: str, progress_callback, max_depth: int, max_breadth: int, num_sites_per_query: int
186
- ) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
187
  self.max_depth = max_depth
188
  self.max_breadth = max_breadth
189
  self.num_sites_per_query = num_sites_per_query
190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  self.ctx_researcher = []
192
  self.ctx_manager = []
193
  self.token_count = 0
194
- progress = ResearchProgress(progress_callback)
195
  self.logger.info(f"Starting research on topic: {topic}")
196
 
197
  try:
@@ -219,8 +165,8 @@ class KNet:
219
 
220
  # Only branch if we have data and haven't reached max depth
221
  if current_node.data and current_depth < self.max_depth:
222
- if self._should_branch_deeper(current_node, topic):
223
- new_branches = self._analyze_and_branch(current_node, topic)
224
  for branch in new_branches:
225
  to_explore.append((branch, current_depth + 1))
226
  self.logger.info(f"Added {len(new_branches)} new branch(es) at depth {current_depth + 1}")
@@ -236,52 +182,11 @@ class KNet:
236
  json.dump(final_report, f, indent=2)
237
  return final_report
238
 
239
- except Exception as e:
240
- self.logger.error(f"Research failed: {str(e)}")
241
- raise e
242
-
243
- def _analyze_and_branch(self, node: ResearchNode, topic: str, retry_count: int = 0) -> List[ResearchNode]:
244
- try:
245
- if not node.data or node.depth > self.max_depth:
246
- return []
247
-
248
- analysis_prompt = dedent(
249
- f"""Based on the following findings about "{topic}", suggest new research directions.
250
- Findings:
251
- {json.dumps(self.ctx_manager, indent=2)}
252
 
253
- Suggest up to {self.max_breadth} specific google search queries that would help data which:
254
- - Builds upon these findings
255
- - Explores different aspects
256
- - Goes deeper into important details
257
-
258
- Return as JSON array of objects with properties:
259
- - query (string)"""
260
- )
261
-
262
- response = self.research_manager.generate_content(analysis_prompt, generation_config={**self.analysis_schema})
263
- self._track_tokens(response.usage_metadata.total_token_count)
264
- result = json.loads(response.text)
265
- self.logger.info(f"New branches for '{node.query}': {result['branches']}")
266
-
267
- # Add children to current node
268
- # +> child1
269
- # node - +> child2
270
- # +> child3
271
- new_nodes = []
272
- for branch in result.get("branches", []):
273
- child_node = node.add_child(branch["query"])
274
- new_nodes.append(child_node)
275
- return new_nodes
276
-
277
- except Exception as e:
278
- if result["candidates"][0]["finishReason"] == "RECITATION" and retry_count <= 3:
279
- self.logger.error(f"Retrying analysis: {str(e)}\nC:{retry_count / 3}")
280
- self._analyze_and_branch(node, topic, retry_count + 1)
281
- self.logger.error(f"Branch analysis failed: {str(e)}")
282
- raise e
283
-
284
- def _generate_final_report(self, root_node: ResearchNode, retry_count: int = 0) -> Dict[str, Any]:
285
  try:
286
  findings = "\n".join(self.ctx_manager)
287
  with open("output.json", "w") as f:
@@ -289,8 +194,7 @@ class KNet:
289
  prompt = f"""Generate a comprehensive report on the topic "{root_node.query}" based on the following research findings:
290
  {findings}
291
  """
292
- response = self.research_manager.generate_content(prompt)
293
- self._track_tokens(response.usage_metadata.total_token_count)
294
 
295
  # Collate multimedia content
296
  media_content = {"images": [], "videos": [], "links": [], "references": []}
@@ -301,8 +205,8 @@ class KNet:
301
  if data.get("videos"):
302
  media_content["videos"].extend(data["videos"])
303
  if data.get("links"):
304
- media_content["links"].extend([{"url": l["href"], "text": l["text"]} for l in data["links"]])
305
- # Deduplicate
306
  media_content["images"] = list(set(media_content["images"]))
307
  media_content["videos"] = list(set(media_content["videos"]))
308
  media_content["links"] = list({json.dumps(d, sort_keys=True) for d in media_content["links"]})
@@ -324,7 +228,7 @@ class KNet:
324
  return {
325
  "topic": root_node.query,
326
  "timestamp": datetime.now().isoformat(),
327
- "content": response.text,
328
  "media": media_content,
329
  "research_tree": build_tree_structure(root_node),
330
  "metadata": {
@@ -334,9 +238,79 @@ class KNet:
334
  "total_tokens": self.token_count,
335
  },
336
  }
 
337
  except Exception as e:
338
- if response["candidates"][0]["finishReason"] == "RECITATION":
339
- self.logger.error(f"Retrying final report: {str(e)}\nC:{retry_count / 3}")
340
  self._generate_final_report(root_node, retry_count + 1)
341
- self.logger.error(f"Error generating final report: {str(e)}")
342
- raise e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  load_dotenv()
17
 
18
 
19
+ class Prompt:
20
+ def __init__(self) -> None:
21
+ self.continue_branch = dedent("""Given the current research context and findings, should we explore this branch deeper?
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  Current Topic: {query}
24
  Current Depth: {depth}
 
32
  3. Depth vs breadth tradeoff
33
  4. Information saturation
34
 
35
+ Return only: decision: true/false""")
36
+
37
+ self.search_query = dedent("""Based on the following findings about "{topic}", suggest new research directions.
38
+ Findings:
39
+ {ctx_manager}
40
+
41
+ Suggest up to {max_breadth} specific google search queries that would help data which:
42
+ - Builds upon these findings
43
+ - Explores different aspects
44
+ - Goes deeper into important details
45
 
46
+ Return as JSON array of objects with properties:
47
+ - query (string)""")
48
+
49
+
50
+ class Schema:
51
+ def __init__(self) -> None:
52
+ self.continue_branch = {
53
  "response_schema": content.Schema(
54
  type=content.Type.OBJECT,
55
  required=["decision"],
 
60
  "response_mime_type": "application/json",
61
  }
62
 
63
+ self.search_query = {
 
64
  "response_schema": content.Schema(
65
  type=content.Type.OBJECT,
66
  required=["branches"],
 
81
  "response_mime_type": "application/json",
82
  }
83
 
 
 
84
 
85
+ class ResearchProgress:
86
+ def __init__(self, callback):
87
+ self.progress = 0
88
+ self.callback = callback
89
 
90
+ async def update(self, progress: int, message: str):
91
+ self.progress += progress
92
+ if self.progress > 100:
93
+ self.progress = 100
94
+ if self.callback:
95
+ await self.callback({"progress": self.progress, "message": message})
 
 
 
96
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ class KNet:
99
+ def __init__(self, scraper_instance, max_depth: int = 1, max_breadth: int = 1, num_sites_per_query: int = 5):
100
+ self.api_key = os.getenv("GOOGLE_API_KEY")
101
+ assert self.api_key, "Google API key is required"
102
+ self.scraper = scraper_instance
103
+ self.logger = logging.getLogger(__name__)
104
+ self.prompt = Prompt()
105
+ self.schema = Schema()
106
+
107
+ # Init Agents' Base Model
108
+ genai.configure(api_key=self.api_key)
109
+ generation_config = {"temperature": 0.9}
110
+ safe = [
111
+ {"category": "HARM_CATEGORY_DANGEROUS", "threshold": "BLOCK_NONE"},
112
+ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
113
+ {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
114
+ {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
115
+ {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
116
+ ]
117
+ self.researcher = genai.GenerativeModel("gemini-2.0-flash", generation_config=generation_config, safety_settings=safe)
118
+ self.research_manager = genai.GenerativeModel("gemini-2.0-flash", generation_config=generation_config, safety_settings=safe)
119
+
120
+ # Parameters
121
  self.max_depth = max_depth
122
  self.max_breadth = max_breadth
123
  self.num_sites_per_query = num_sites_per_query
124
 
125
+ # Global State
126
+ self.ctx_researcher: list[str] = []
127
+ self.ctx_manager: list[str] = []
128
+ self.token_count: int = 0
129
+
130
+ async def conduct_research(self, topic: str, progress_callback, max_depth: int, max_breadth: int, num_sites_per_query: int) -> dict:
131
+ # Local Runtime State
132
+ progress = ResearchProgress(progress_callback)
133
+ self.max_depth = max_depth
134
+ self.max_breadth = max_breadth
135
+ self.num_sites_per_query = num_sites_per_query
136
+
137
+ # Reset global state
138
  self.ctx_researcher = []
139
  self.ctx_manager = []
140
  self.token_count = 0
 
141
  self.logger.info(f"Starting research on topic: {topic}")
142
 
143
  try:
 
165
 
166
  # Only branch if we have data and haven't reached max depth
167
  if current_node.data and current_depth < self.max_depth:
168
+ if self._should_continue_branch(current_node, topic):
169
+ new_branches = self._gen_queries(current_node, topic)
170
  for branch in new_branches:
171
  to_explore.append((branch, current_depth + 1))
172
  self.logger.info(f"Added {len(new_branches)} new branch(es) at depth {current_depth + 1}")
 
182
  json.dump(final_report, f, indent=2)
183
  return final_report
184
 
185
+ except Exception:
186
+ self.logger.error("Research failed", exc_info=True)
187
+ raise
 
 
 
 
 
 
 
 
 
 
188
 
189
+ def _generate_final_report(self, root_node: ResearchNode, retry_count: int = 1) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  try:
191
  findings = "\n".join(self.ctx_manager)
192
  with open("output.json", "w") as f:
 
194
  prompt = f"""Generate a comprehensive report on the topic "{root_node.query}" based on the following research findings:
195
  {findings}
196
  """
197
+ response = self.generate_content(prompt)
 
198
 
199
  # Collate multimedia content
200
  media_content = {"images": [], "videos": [], "links": [], "references": []}
 
205
  if data.get("videos"):
206
  media_content["videos"].extend(data["videos"])
207
  if data.get("links"):
208
+ media_content["links"].extend([{"url": link["href"], "text": link["text"]} for link in data["links"]])
209
+ # Dedupe
210
  media_content["images"] = list(set(media_content["images"]))
211
  media_content["videos"] = list(set(media_content["videos"]))
212
  media_content["links"] = list({json.dumps(d, sort_keys=True) for d in media_content["links"]})
 
228
  return {
229
  "topic": root_node.query,
230
  "timestamp": datetime.now().isoformat(),
231
+ "content": response,
232
  "media": media_content,
233
  "research_tree": build_tree_structure(root_node),
234
  "metadata": {
 
238
  "total_tokens": self.token_count,
239
  },
240
  }
241
+
242
  except Exception as e:
243
+ if e == "GEMINI_RECITATION" and retry_count < 3:
244
+ self.logger.error(f"Retrying final report:C:{retry_count / 3}", exc_info=True)
245
  self._generate_final_report(root_node, retry_count + 1)
246
+ self.logger.error("Error generating final report", exc_info=True)
247
+ raise
248
+
249
+ def _gen_queries(self, node: ResearchNode, topic: str, retry_count: int = 1) -> List[ResearchNode]:
250
+ try:
251
+ if not node.data or node.depth > self.max_depth:
252
+ return []
253
+
254
+ prompt = self.prompt.search_query.format(topic=topic, ctx_manager=json.dumps(self.ctx_manager, indent=2), max_breadth=self.max_breadth)
255
+ response = self.generate_content(prompt, generation_config=self.schema.search_query)
256
+ self.logger.info(f"New branches for '{node.query}': {response['branches']}")
257
+
258
+ # Add children to current node
259
+ # |-> child
260
+ # node -|-> child
261
+ # |-> child
262
+ new_nodes = []
263
+ for branch in response.get("branches", []):
264
+ child_node = node.add_child(branch["query"])
265
+ new_nodes.append(child_node)
266
+ return new_nodes
267
+
268
+ except Exception as e:
269
+ if e == "GEMINI_RECITATION" and retry_count < 3:
270
+ self.logger.error(f"Retrying analysis:C:{retry_count / 3}", exc_info=True)
271
+ self._gen_queries(node, topic, retry_count + 1)
272
+ self.logger.error("Branch analysis failed:", exc_info=True)
273
+ raise
274
+
275
+ def _should_continue_branch(self, node: ResearchNode, topic: str, retry_count: int = 1) -> bool:
276
+ try:
277
+ if node.depth > self.max_depth:
278
+ return False
279
+
280
+ # Generate summary of key findings into the manager's context
281
+ if node.data:
282
+ findings = ("\n" + "-" * 10 + "Next data" + "-" * 10 + "\n").join([json.dumps(d, indent=2) for d in node.data])
283
+ response = self.generate_content(f"Extract key findings from the following data related to the topic '{topic}':\n{findings}")
284
+ self.ctx_manager.append(response)
285
+
286
+ # Research manager takes decision to proceed or not
287
+ prompt = self.prompt.continue_branch.format(
288
+ query=node.query,
289
+ depth=node.depth,
290
+ path=" -> ".join(node.get_path_to_root()),
291
+ findings="\n".join(self.ctx_manager),
292
+ )
293
+ response = self.generate_content(prompt, generation_config=self.schema.continue_branch)
294
+ self.logger.info(f"Branch decision for '{node.query}': {response['decision']}")
295
+
296
+ return response["decision"]
297
+
298
+ except Exception as e:
299
+ if e == "GEMINI_RECITATION" and retry_count < 3:
300
+ self.logger.error(f"Retrying branch decision:C:{retry_count / 3}", exc_info=True)
301
+ self._should_continue_branch(node, topic, retry_count + 1)
302
+ self.logger.error("Branch decision failed:", exc_info=True)
303
+ raise
304
+
305
+ def generate_content(self, prompt: str, generation_config: Dict[str, Any] = {}) -> Dict[str, Any] | str:
306
+ try:
307
+ response = self.researcher.generate_content(prompt, generation_config=generation_config)
308
+ self.token_count += response.usage_metadata.total_token_count
309
+ if generation_config:
310
+ return json.loads(response.text)
311
+ return response.text
312
+
313
+ except Exception:
314
+ if response["candidates"][0]["finishReason"] == "RECITATION":
315
+ raise Exception("GEMINI_RECITATION")
316
+ raise
backend/research_node.py CHANGED
@@ -1,10 +1,11 @@
1
  import copy
2
- from datetime import datetime
3
  from typing import Any, Dict, List, Optional
4
 
5
 
6
  class ResearchNode:
7
- def __init__(self, query: str, parent: Optional["ResearchNode"] = None, depth: int = 0):
 
 
8
  self.query = query
9
  self.parent = parent
10
  self.depth = depth
@@ -32,7 +33,9 @@ class ResearchNode:
32
  def total_children(self) -> int:
33
  if not self.children:
34
  return 0
35
- return len(self.children) + sum([child.total_children() for child in self.children])
 
 
36
 
37
  def get_all_data(self) -> List[Dict[str, Any]]:
38
  data = copy.deepcopy(self.data)
 
1
  import copy
 
2
  from typing import Any, Dict, List, Optional
3
 
4
 
5
  class ResearchNode:
6
+ def __init__(
7
+ self, query: str, parent: Optional["ResearchNode"] = None, depth: int = 0
8
+ ):
9
  self.query = query
10
  self.parent = parent
11
  self.depth = depth
 
33
  def total_children(self) -> int:
34
  if not self.children:
35
  return 0
36
+ return len(self.children) + sum(
37
+ [child.total_children() for child in self.children]
38
+ )
39
 
40
  def get_all_data(self) -> List[Dict[str, Any]]:
41
  data = copy.deepcopy(self.data)
backend/scraper.py CHANGED
@@ -267,7 +267,7 @@ class CrawlForAIScraper:
267
  for img in soup.find_all("img"):
268
  if "src" in img.attrs:
269
  src = img["src"]
270
- if not "width" or not "height" in img.attrs:
271
  continue
272
  if "width" in img.attrs and img.get("width").lower() == "auto":
273
  images.append((src, 999, 0))
 
267
  for img in soup.find_all("img"):
268
  if "src" in img.attrs:
269
  src = img["src"]
270
+ if not "width" or "height" not in img.attrs:
271
  continue
272
  if "width" in img.attrs and img.get("width").lower() == "auto":
273
  images.append((src, 999, 0))