Lui3ui3ui commited on
Commit
5dfa96c
Β·
verified Β·
1 Parent(s): 0e93ba2

Upload 2 files

Browse files
Files changed (2) hide show
  1. agents.py +478 -374
  2. app.py +30 -14
agents.py CHANGED
@@ -1,374 +1,478 @@
1
- from langgraph.graph import StateGraph, END
2
- from search import duckduckgo_search
3
- import asyncio
4
- import re
5
- import json
6
- import asyncio
7
- import httpx
8
- import os
9
- import ast
10
- import traceback
11
- from huggingface_hub import InferenceClient
12
-
13
- # Create a single shared client
14
- # It will read your HUGGINGFACEHUB_API_TOKEN from the env for authentication
15
- client = InferenceClient(token=os.getenv("HF_API_TOKEN"))
16
-
17
- async def hf_chat(model: str, messages: list[dict]):
18
- loop = asyncio.get_running_loop()
19
-
20
- def _sync_call():
21
- # Ensure you have initialized the client with your HF_API_TOKEN
22
- return client.chat.completions.create(
23
- model=model,
24
- messages=messages,
25
- # you can pass generation params here too
26
- # temperature=0.7, max_tokens=512, ...
27
- )
28
-
29
- completion = await loop.run_in_executor(None, _sync_call)
30
-
31
- return {
32
- "message": {
33
- "role": completion.choices[0].message.role,
34
- "content": completion.choices[0].message.content
35
- }
36
- }
37
-
38
- # Alias `chat` to your HF-backed version
39
- chat = hf_chat
40
-
41
- class AsyncLogger:
42
- def __init__(self):
43
- self._log = []
44
- self._lock = asyncio.Lock()
45
-
46
- async def log(self, message):
47
- async with self._lock:
48
- self._log.append(message)
49
-
50
- async def get_log(self):
51
- async with self._lock:
52
- return "\n".join(self._log)
53
-
54
- async def clear(self):
55
- async with self._lock:
56
- self._log.clear()
57
-
58
- logger = AsyncLogger()
59
-
60
- def extract_json_array(text: str):
61
- # Remove Markdown/HTML formatting
62
- text = re.sub(r"```(?:json)?\n?|</?(?:pre|code|p)>", "", text, flags=re.IGNORECASE)
63
-
64
- # Extract the first [...] block
65
- match = re.search(r"(\[\s*{.*?}\s*\])", text, re.DOTALL)
66
- if not match:
67
- return []
68
-
69
- json_str = match.group(1)
70
-
71
- # Try parsing as JSON
72
- try:
73
- return json.loads(json_str)
74
- except json.JSONDecodeError as e:
75
- print("[extract_json_array] JSON decode error:", e)
76
-
77
- # Fallback: try ast.literal_eval
78
- try:
79
- return ast.literal_eval(json_str)
80
- except Exception as e2:
81
- print("[extract_json_array] literal_eval failed:", e2)
82
- return []
83
-
84
- # Node 1: Extract books from user input
85
- async def extract_books_node(state):
86
- try:
87
- print("[extract_books_node] πŸ‘‰ enter")
88
- user_input = state.get("user_input", "")
89
- prompt = (
90
- "Extract all book titles and authors from the user input. Do not add books on your own, just take the user input."
91
- "If a book is mentioned but the author is missing, try to fill the missing author in using reasoning with your knowledge."
92
- "ONLY output a JSON list of dicts, like this:\n"
93
- '[{"title": "...", "author": "..."}, ...]\n'
94
- "Do not add any explanations, prefixes, or markdown. Just the JSON list.\n\n"
95
- f"User input: {user_input}"
96
- )
97
- print("[extract_books_node] Prompt sent to LLM:\n", prompt)
98
-
99
- response = await chat(
100
- model="mistralai/Mistral-7B-Instruct-v0.2",
101
- messages=[{"role":"user","content": prompt}]
102
- )
103
- content = response["message"]["content"]
104
-
105
- print("[extract_books_node] Raw LLM response:\n", repr(content))
106
- print(f"[extract_books_node] Response type: {type(content)}, length: {len(content)}")
107
-
108
- # Remove code blocks or markdown again here just to be sure
109
- cleaned_content = re.sub(r"```(?:json)?\n?|</?(?:pre|code|p)>", "", content, flags=re.IGNORECASE).strip()
110
- print("[extract_books_node] Cleaned response:\n", repr(cleaned_content))
111
-
112
- books = []
113
- try:
114
- books = json.loads(cleaned_content)
115
- print("[extract_books_node] JSON parsed successfully.")
116
- except json.JSONDecodeError as e:
117
- print(f"[extract_books_node] JSONDecodeError: {e}")
118
- print("[extract_books_node] Attempting fallback parsing with ast.literal_eval.")
119
- try:
120
- books = ast.literal_eval(cleaned_content)
121
- print("[extract_books_node] Fallback parsing successful.")
122
- except Exception as e2:
123
- print(f"[extract_books_node] Fallback parsing failed: {e2}")
124
- print("[extract_books_node] Traceback:\n", traceback.format_exc())
125
-
126
- print("[extract_books_node] Extracted books:", books)
127
- print("[extract_books_node] πŸ‘ˆ exit with", {"extracted_books": books})
128
- return {"extracted_books": books}
129
-
130
- except Exception as e:
131
- print("[extract_books_node] ❌ exception:", repr(e))
132
- print("[extract_books_node] Traceback:\n", traceback.format_exc())
133
- raise
134
-
135
- # Node 1.1 New Node: Complete missing authors
136
- async def complete_authors_node(state):
137
- try:
138
- print("[complete_authors_node] πŸ‘‰ enter")
139
- books = state.get("extracted_books", [])
140
- incomplete_books = [book for book in books if not book.get("author", "").strip()]
141
-
142
- if not incomplete_books:
143
- print("[complete_authors_node] No missing authors to complete.")
144
- return {"extracted_books": books}
145
-
146
- # Prepare prompt for LLM
147
- prompt = (
148
- "You are given a list of books with some missing authors. "
149
- "For each book, fill in the correct author using your knowledge. "
150
- "ONLY output a JSON list like this:\n"
151
- '[{"title": "...", "author": "..."}, ...]\n\n'
152
- "Do not add explanations, prefixes, or markdown. Only output the JSON list.\n\n"
153
- f"Books with missing authors:\n{json.dumps(incomplete_books, ensure_ascii=False)}"
154
- )
155
-
156
- print("[complete_authors_node] Prompt sent to LLM:\n", prompt)
157
-
158
- response = await chat(
159
- model="mistralai/Mistral-7B-Instruct-v0.2",
160
- messages=[{"role": "user", "content": prompt}]
161
- )
162
- content = response["message"]["content"]
163
-
164
- print("[complete_authors_node] Raw LLM response:\n", repr(content))
165
- print(f"[complete_authors_node] Response type: {type(content)}, length: {len(content)}")
166
-
167
- # Remove code blocks or markdown
168
- cleaned_content = re.sub(r"```(?:json)?\n?|</?(?:pre|code|p)>", "", content, flags=re.IGNORECASE).strip()
169
- print("[complete_authors_node] Cleaned response:\n", repr(cleaned_content))
170
-
171
- completed_books_from_llm = []
172
- try:
173
- completed_books_from_llm = json.loads(cleaned_content)
174
- print("[complete_authors_node] JSON parsed successfully.")
175
- except json.JSONDecodeError as e:
176
- print(f"[complete_authors_node] JSONDecodeError: {e}")
177
- print("[complete_authors_node] Attempting fallback parsing with ast.literal_eval.")
178
- try:
179
- completed_books_from_llm = ast.literal_eval(cleaned_content)
180
- print("[complete_authors_node] Fallback parsing successful.")
181
- except Exception as e2:
182
- print(f"[complete_authors_node] Fallback parsing failed: {e2}")
183
- print("[complete_authors_node] Traceback:\n", traceback.format_exc())
184
-
185
- # Merge back into the full book list
186
- title_to_author = {book["title"]: book.get("author", "Unknown") for book in completed_books_from_llm}
187
- completed_books = []
188
- for book in books:
189
- title = book.get("title", "").strip()
190
- author = book.get("author", "").strip()
191
- if not author:
192
- # Fill from LLM result or fallback to DuckDuckGo
193
- author = title_to_author.get(title, "").strip()
194
- if not author:
195
- # DuckDuckGo fallback if still missing
196
- query = f"{title} book author"
197
- print(f"[complete_authors_node] Searching DuckDuckGo for author: {query}")
198
- search_results = await duckduckgo_search(query)
199
-
200
- found_author = "Unknown"
201
- if search_results:
202
- for res in search_results:
203
- snippet = res.get("snippet", "")
204
- title_text = res.get("title", "")
205
- match = re.search(r"by ([A-Z][a-z]+(?: [A-Z][a-z]+)*)", snippet + " " + title_text)
206
- if match:
207
- found_author = match.group(1)
208
- print(f"[complete_authors_node] Found author '{found_author}' for book '{title}'")
209
- break
210
- author = found_author
211
-
212
- completed_books.append({
213
- "title": title,
214
- "author": author
215
- })
216
-
217
- print("[complete_authors_node] Completed books list:", completed_books)
218
- return {"extracted_books": completed_books}
219
-
220
- except Exception as e:
221
- print("[complete_authors_node] ❌ exception:", repr(e))
222
- print("[complete_authors_node] Traceback:\n", traceback.format_exc())
223
- raise
224
-
225
- # Node 2
226
- async def recommend_books_node(state):
227
- try:
228
- print("[recommend_books_node] πŸ‘‰ enter")
229
- extracted_books = state.get("extracted_books", [])
230
- reasoning_steps = []
231
- recommended_books = []
232
-
233
- print("[recommend_books_node] Extracted books:", extracted_books)
234
- # await logger.log(f"[recommend_books_node] Extracted books: {extracted_books}")
235
-
236
- if not extracted_books:
237
- reasoning_steps.append("No books extracted from the input. Check if the extraction failed.")
238
- return {"recommendations": [], "reasoning": "\n".join(reasoning_steps)}
239
-
240
- for book in extracted_books:
241
- title = book.get("title", "")
242
- author = book.get("author", "")
243
- query = f"Books similar to '{title}' by {author}"
244
- reasoning_steps.append(f"Searching DuckDuckGo with query: {query}")
245
-
246
- print(f"[recommend_books_node] Searching with query: {query}")
247
- # await logger.log(f"Searching DuckDuckGo with query: {query}")
248
-
249
- search_results = await duckduckgo_search(query)
250
-
251
- if not search_results:
252
- reasoning_steps.append(f"No results found for: {query}")
253
- print(f"[recommend_books_node] No results found for query: {query}")
254
- # await logger.log(f"No results found for query: {query}")
255
- continue
256
-
257
- print(f"[recommend_books_node] Results for query '{query}': {search_results}")
258
-
259
- for res in search_results:
260
- recommended_books.append({
261
- "title": res.get("title", "No Title"),
262
- "link": res.get("link", ""),
263
- "snippet": res.get("snippet", "")
264
- })
265
- reasoning_steps.append(f"βœ… Found: {res.get('title', 'No Title')} ({res.get('link', '')})")
266
-
267
- if not recommended_books:
268
- reasoning_steps.append("No recommendations found across all queries.")
269
-
270
- print("[recommend_books_node] Final recommendations:", recommended_books)
271
- print("[recommend_books_node] πŸ‘ˆ exit with", {"recommendations": recommended_books, "reasoning": "\n".join(reasoning_steps)})
272
- return {
273
- "recommendations": recommended_books,
274
- "reasoning": "\n".join(reasoning_steps)
275
- }
276
-
277
- except Exception as e:
278
- print("[extract_books_node] ❌ exception:", repr(e))
279
- raise
280
-
281
- # Node 3: Reason about the search results and generate recommendations
282
-
283
- async def reasoning_node(state):
284
- try:
285
- recommendations = state.get("recommendations", [])
286
- initial_reasoning = state.get("reasoning", "")
287
-
288
- if not recommendations:
289
- final_reasoning = initial_reasoning + "\nNo recommendations found to reason about."
290
- print("[reasoning_node] No recommendations to process.")
291
- return {"final_recommendations": [], "final_reasoning": final_reasoning}
292
-
293
- # Format recommendations as input for the LLM
294
- recommendations_text = "\n".join(
295
- [f"Title: {rec['title']}\nLink: {rec['link']}\nSnippet: {rec['snippet']}\n" for rec in recommendations]
296
- )
297
-
298
- prompt = (
299
- "You are a helpful book recommendation expert. You are given a web search result. "
300
- "Analyze it and select the most relevant book recommendations. Explain why you recommend each book. "
301
- "Do not recommend the same books from the user input!"
302
- "Output only a JSON list like this:\n"
303
- '[{"title": "...", "reason": "...", "link": "..."}, ...]\n\n'
304
- "Do not add any explanations, comments, or extra text. Only output the JSON list.\n\n"
305
- f"Books found from search:\n{recommendations_text}"
306
- )
307
-
308
- print("[reasoning_node] Prompt sent to LLM:\n", prompt)
309
-
310
- response = await chat(
311
- model="mistralai/Mistral-7B-Instruct-v0.2",
312
- messages=[{"role":"user","content": prompt}]
313
- )
314
-
315
- content = response['message']['content']
316
-
317
- print("[reasoning_node] Raw LLM response:\n", repr(content))
318
- print(f"[reasoning_node] Response type: {type(content)}, length: {len(content)}")
319
-
320
- # Clean the content from code blocks, markdown, etc.
321
- cleaned_content = re.sub(r"```(?:json)?\n?|</?(?:pre|code|p)>", "", content, flags=re.IGNORECASE).strip()
322
- print("[reasoning_node] Cleaned response:\n", repr(cleaned_content))
323
-
324
- final_recommendations = []
325
- try:
326
- final_recommendations = json.loads(cleaned_content)
327
- print("[reasoning_node] JSON parsed successfully.")
328
- except json.JSONDecodeError as e:
329
- print(f"[reasoning_node] JSONDecodeError: {e}")
330
- print("[reasoning_node] Attempting fallback parsing with ast.literal_eval.")
331
- try:
332
- final_recommendations = ast.literal_eval(cleaned_content)
333
- print("[reasoning_node] Fallback parsing successful.")
334
- except Exception as e2:
335
- print(f"[reasoning_node] Fallback parsing failed: {e2}")
336
- print("[reasoning_node] Traceback:\n", traceback.format_exc())
337
-
338
- # Compose final reasoning combining initial and LLM results
339
- final_reasoning = initial_reasoning + "\n\nFinal reasoning:\n"
340
- for rec in final_recommendations:
341
- final_reasoning += f"βœ… Recommended: {rec.get('title', 'Unknown')} - {rec.get('reason', 'No reason provided.')}\n"
342
-
343
- print("[reasoning_node] Final recommendations extracted:", final_recommendations)
344
- print("[reasoning_node] Final reasoning:\n", final_reasoning)
345
-
346
- return {
347
- "final_recommendations": final_recommendations,
348
- "final_reasoning": final_reasoning
349
- }
350
-
351
- except Exception as e:
352
- print("[reasoning_node] ❌ exception:", repr(e))
353
- print("[reasoning_node] Traceback:\n", traceback.format_exc())
354
- raise
355
-
356
-
357
-
358
- # Build the graph
359
- def build_graph():
360
- graph = StateGraph(dict)
361
-
362
- graph.add_node("extract_books", extract_books_node)
363
- graph.add_node("complete_authors", complete_authors_node) # <-- New node
364
- graph.add_node("recommend_books", recommend_books_node)
365
- graph.add_node("reasoning", reasoning_node)
366
-
367
- # Define edges
368
- graph.add_edge("extract_books", "complete_authors") # Modified
369
- graph.add_edge("complete_authors", "recommend_books") # Modified
370
- graph.add_edge("recommend_books", "reasoning")
371
- graph.add_edge("reasoning", END)
372
-
373
- graph.set_entry_point("extract_books")
374
- return graph.compile()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import StateGraph, END
2
+ from search import duckduckgo_search
3
+ import asyncio
4
+ import re
5
+ import json
6
+ import asyncio
7
+ import httpx
8
+ import os
9
+ import ast
10
+ import traceback
11
+ from huggingface_hub import InferenceClient
12
+
13
+ # Create a single shared client
14
+ # It will read your HUGGINGFACEHUB_API_TOKEN from the env for authentication
15
+ client = InferenceClient(token=os.getenv("HF_API_TOKEN"))
16
+
17
+ async def hf_chat(model: str, messages: list[dict]):
18
+ loop = asyncio.get_running_loop()
19
+
20
+ def _sync_call():
21
+ # Ensure you have initialized the client with your HF_API_TOKEN
22
+ return client.chat.completions.create(
23
+ model=model,
24
+ messages=messages,
25
+ # you can pass generation params here too
26
+ # temperature=0.7, max_tokens=512, ...
27
+ )
28
+
29
+ completion = await loop.run_in_executor(None, _sync_call)
30
+
31
+ return {
32
+ "message": {
33
+ "role": completion.choices[0].message.role,
34
+ "content": completion.choices[0].message.content
35
+ }
36
+ }
37
+
38
+ # Alias `chat` to your HF-backed version
39
+ chat = hf_chat
40
+
41
+ class AsyncLogger:
42
+ def __init__(self):
43
+ self._log = []
44
+ self._lock = asyncio.Lock()
45
+
46
+ async def log(self, message):
47
+ async with self._lock:
48
+ self._log.append(message)
49
+
50
+ async def get_log(self):
51
+ async with self._lock:
52
+ return "\n".join(self._log)
53
+
54
+ async def clear(self):
55
+ async with self._lock:
56
+ self._log.clear()
57
+
58
+ logger = AsyncLogger()
59
+
60
+ def extract_json_array(text: str):
61
+ # Remove Markdown/HTML formatting
62
+ text = re.sub(r"```(?:json)?\n?|</?(?:pre|code|p)>", "", text, flags=re.IGNORECASE)
63
+
64
+ # Extract the first [...] block
65
+ match = re.search(r"(\[\s*{.*?}\s*\])", text, re.DOTALL)
66
+ if not match:
67
+ # Try to find any JSON array in the text
68
+ match = re.search(r"\[.*?\]", text, re.DOTALL)
69
+ if not match:
70
+ return []
71
+
72
+ json_str = match.group(1)
73
+
74
+ # Try parsing as JSON
75
+ try:
76
+ return json.loads(json_str)
77
+ except json.JSONDecodeError as e:
78
+ print("[extract_json_array] JSON decode error:", e)
79
+
80
+ # Fallback: try ast.literal_eval
81
+ try:
82
+ return ast.literal_eval(json_str)
83
+ except Exception as e2:
84
+ print("[extract_json_array] literal_eval failed:", e2)
85
+ # Try to fix common JSON issues
86
+ try:
87
+ # Remove trailing commas
88
+ json_str = re.sub(r',\s*}', '}', json_str)
89
+ json_str = re.sub(r',\s*]', ']', json_str)
90
+ # Fix unquoted keys
91
+ json_str = re.sub(r'(\w+):', r'"\1":', json_str)
92
+ return json.loads(json_str)
93
+ except Exception as e3:
94
+ print("[extract_json_array] JSON fixing failed:", e3)
95
+ return []
96
+
97
+ def safe_json_parse(content: str, fallback_value=None):
98
+ """Safely parse JSON content with multiple fallback strategies"""
99
+ if fallback_value is None:
100
+ fallback_value = []
101
+
102
+ # Clean the content
103
+ cleaned_content = re.sub(r"```(?:json)?\n?|</?(?:pre|code|p)>", "", content, flags=re.IGNORECASE).strip()
104
+
105
+ # Try direct JSON parsing
106
+ try:
107
+ return json.loads(cleaned_content)
108
+ except json.JSONDecodeError as e:
109
+ print(f"[safe_json_parse] JSONDecodeError: {e}")
110
+
111
+ # Try to extract JSON array
112
+ extracted = extract_json_array(cleaned_content)
113
+ if extracted:
114
+ return extracted
115
+
116
+ # Try ast.literal_eval
117
+ try:
118
+ return ast.literal_eval(cleaned_content)
119
+ except Exception as e2:
120
+ print(f"[safe_json_parse] literal_eval failed: {e2}")
121
+
122
+ # Try to fix common JSON issues
123
+ try:
124
+ # Remove trailing commas
125
+ fixed_content = re.sub(r',\s*}', '}', cleaned_content)
126
+ fixed_content = re.sub(r',\s*]', ']', fixed_content)
127
+ # Fix unquoted keys
128
+ fixed_content = re.sub(r'(\w+):', r'"\1":', fixed_content)
129
+ # Fix single quotes to double quotes
130
+ fixed_content = fixed_content.replace("'", '"')
131
+ return json.loads(fixed_content)
132
+ except Exception as e3:
133
+ print(f"[safe_json_parse] JSON fixing failed: {e3}")
134
+ return fallback_value
135
+
136
+ def merge_state(current_state: dict, new_data: dict) -> dict:
137
+ """Safely merge new data into current state, preserving existing data"""
138
+ merged_state = current_state.copy()
139
+ for key, value in new_data.items():
140
+ if key in merged_state:
141
+ # If both are lists, extend the current list
142
+ if isinstance(merged_state[key], list) and isinstance(value, list):
143
+ merged_state[key].extend(value)
144
+ # If both are strings, concatenate them
145
+ elif isinstance(merged_state[key], str) and isinstance(value, str):
146
+ merged_state[key] += "\n" + value
147
+ # Otherwise, overwrite
148
+ else:
149
+ merged_state[key] = value
150
+ else:
151
+ merged_state[key] = value
152
+ return merged_state
153
+
154
+ # Node 1: Extract books from user input
155
+ async def extract_books_node(state):
156
+ try:
157
+ print("[extract_books_node] πŸ‘‰ enter")
158
+ user_input = state.get("user_input", "")
159
+ prompt = (
160
+ "Extract all book titles and authors from the user input. Do not add books on your own, just take the user input."
161
+ "If a book is mentioned but the author is missing, try to fill the missing author in using reasoning with your knowledge."
162
+ "IMPORTANT: Output ONLY a valid JSON array with this exact format:\n"
163
+ '[{"title": "Book Title", "author": "Author Name"}]\n'
164
+ "Rules:\n"
165
+ "- Use double quotes for all strings\n"
166
+ "- No trailing commas\n"
167
+ "- No markdown formatting or code blocks\n"
168
+ "- No explanations or extra text\n"
169
+ "- If no books found, return empty array: []\n\n"
170
+ f"User input: {user_input}"
171
+ )
172
+ print("[extract_books_node] Prompt sent to LLM:\n", prompt)
173
+
174
+ response = await chat(
175
+ model="mistralai/Mistral-7B-Instruct-v0.2",
176
+ messages=[{"role":"user","content": prompt}]
177
+ )
178
+ content = response["message"]["content"]
179
+
180
+ print("[extract_books_node] Raw LLM response:\n", repr(content))
181
+ print(f"[extract_books_node] Response type: {type(content)}, length: {len(content)}")
182
+
183
+ # Use the new safe JSON parsing function
184
+ books = safe_json_parse(content, fallback_value=[])
185
+
186
+ # If parsing completely failed, try to extract book titles manually
187
+ if not books and content:
188
+ print("[extract_books_node] JSON parsing failed, attempting manual extraction")
189
+ # Look for patterns like "title" or "book" in the content
190
+ lines = content.split('\n')
191
+ manual_books = []
192
+ for line in lines:
193
+ line = line.strip()
194
+ if any(keyword in line.lower() for keyword in ['title', 'book', 'author']):
195
+ # Try to extract title and author from the line
196
+ title_match = re.search(r'"title":\s*"([^"]+)"', line)
197
+ author_match = re.search(r'"author":\s*"([^"]+)"', line)
198
+ if title_match:
199
+ title = title_match.group(1)
200
+ author = author_match.group(1) if author_match else "Unknown"
201
+ manual_books.append({"title": title, "author": author})
202
+
203
+ if manual_books:
204
+ books = manual_books
205
+ print("[extract_books_node] Manual extraction successful:", books)
206
+
207
+ print("[extract_books_node] Parsed books:", books)
208
+
209
+ # Ensure books is a list and each book has required fields
210
+ if not isinstance(books, list):
211
+ books = []
212
+
213
+ # Validate and clean each book entry
214
+ validated_books = []
215
+ for book in books:
216
+ if isinstance(book, dict):
217
+ validated_book = {
218
+ "title": str(book.get("title", "")).strip(),
219
+ "author": str(book.get("author", "")).strip()
220
+ }
221
+ if validated_book["title"]: # Only add if title is not empty
222
+ validated_books.append(validated_book)
223
+
224
+ print("[extract_books_node] Validated books:", validated_books)
225
+ print("[extract_books_node] πŸ‘ˆ exit with", {"extracted_books": validated_books})
226
+ return {"extracted_books": validated_books}
227
+
228
+ except Exception as e:
229
+ print("[extract_books_node] ❌ exception:", repr(e))
230
+ print("[extract_books_node] Traceback:\n", traceback.format_exc())
231
+ raise
232
+
233
+ # Node 1.1 New Node: Complete missing authors
234
+ async def complete_authors_node(state):
235
+ try:
236
+ print("[complete_authors_node] πŸ‘‰ enter")
237
+ books = state.get("extracted_books", [])
238
+ incomplete_books = [book for book in books if not book.get("author", "").strip()]
239
+
240
+ if not incomplete_books:
241
+ print("[complete_authors_node] No missing authors to complete.")
242
+ return {"extracted_books": books}
243
+
244
+ # Prepare prompt for LLM
245
+ prompt = (
246
+ "You are given a list of books with some missing authors. "
247
+ "For each book, fill in the correct author using your knowledge. "
248
+ "IMPORTANT: Output ONLY a valid JSON array with this exact format:\n"
249
+ '[{"title": "Book Title", "author": "Author Name"}]\n'
250
+ "Rules:\n"
251
+ "- Use double quotes for all strings\n"
252
+ "- No trailing commas\n"
253
+ "- No markdown formatting or code blocks\n"
254
+ "- No explanations or extra text\n"
255
+ "- Return all books, not just the ones with missing authors\n\n"
256
+ f"Books with missing authors:\n{json.dumps(incomplete_books, ensure_ascii=False)}"
257
+ )
258
+
259
+ print("[complete_authors_node] Prompt sent to LLM:\n", prompt)
260
+
261
+ response = await chat(
262
+ model="mistralai/Mistral-7B-Instruct-v0.2",
263
+ messages=[{"role": "user", "content": prompt}]
264
+ )
265
+ content = response["message"]["content"]
266
+
267
+ print("[complete_authors_node] Raw LLM response:\n", repr(content))
268
+ print(f"[complete_authors_node] Response type: {type(content)}, length: {len(content)}")
269
+
270
+ # Use the new safe JSON parsing function
271
+ completed_books_from_llm = safe_json_parse(content, fallback_value=[])
272
+ print("[complete_authors_node] Parsed completed books:", completed_books_from_llm)
273
+
274
+ # Merge back into the full book list
275
+ title_to_author = {book["title"]: book.get("author", "Unknown") for book in completed_books_from_llm}
276
+ completed_books = []
277
+ for book in books:
278
+ title = book.get("title", "").strip()
279
+ author = book.get("author", "").strip()
280
+ if not author:
281
+ # Fill from LLM result or fallback to DuckDuckGo
282
+ author = title_to_author.get(title, "").strip()
283
+ if not author:
284
+ # DuckDuckGo fallback if still missing
285
+ query = f"{title} book author"
286
+ print(f"[complete_authors_node] Searching DuckDuckGo for author: {query}")
287
+ search_results = await duckduckgo_search(query)
288
+
289
+ found_author = "Unknown"
290
+ if search_results:
291
+ for res in search_results:
292
+ snippet = res.get("snippet", "")
293
+ title_text = res.get("title", "")
294
+ match = re.search(r"by ([A-Z][a-z]+(?: [A-Z][a-z]+)*)", snippet + " " + title_text)
295
+ if match:
296
+ found_author = match.group(1)
297
+ print(f"[complete_authors_node] Found author '{found_author}' for book '{title}'")
298
+ break
299
+ author = found_author
300
+
301
+ completed_books.append({
302
+ "title": title,
303
+ "author": author
304
+ })
305
+
306
+ # Validate the completed books
307
+ validated_books = []
308
+ for book in completed_books:
309
+ if isinstance(book, dict):
310
+ validated_book = {
311
+ "title": str(book.get("title", "")).strip(),
312
+ "author": str(book.get("author", "")).strip()
313
+ }
314
+ if validated_book["title"]: # Only add if title is not empty
315
+ validated_books.append(validated_book)
316
+
317
+ print("[complete_authors_node] Validated completed books:", validated_books)
318
+ return {"extracted_books": validated_books}
319
+
320
+ except Exception as e:
321
+ print("[complete_authors_node] ❌ exception:", repr(e))
322
+ print("[complete_authors_node] Traceback:\n", traceback.format_exc())
323
+ raise
324
+
325
+ # Node 2
326
+ async def recommend_books_node(state):
327
+ try:
328
+ print("[recommend_books_node] πŸ‘‰ enter")
329
+ extracted_books = state.get("extracted_books", [])
330
+ reasoning_steps = []
331
+ recommended_books = []
332
+
333
+ print("[recommend_books_node] Extracted books:", extracted_books)
334
+ # await logger.log(f"[recommend_books_node] Extracted books: {extracted_books}")
335
+
336
+ if not extracted_books:
337
+ reasoning_steps.append("No books extracted from the input. Check if the extraction failed.")
338
+ return {"recommendations": [], "reasoning": "\n".join(reasoning_steps)}
339
+
340
+ for book in extracted_books:
341
+ title = book.get("title", "")
342
+ author = book.get("author", "")
343
+ query = f"Books similar to '{title}' by {author}"
344
+ reasoning_steps.append(f"Searching DuckDuckGo with query: {query}")
345
+
346
+ print(f"[recommend_books_node] Searching with query: {query}")
347
+ # await logger.log(f"Searching DuckDuckGo with query: {query}")
348
+
349
+ search_results = await duckduckgo_search(query)
350
+
351
+ if not search_results:
352
+ reasoning_steps.append(f"No results found for: {query}")
353
+ print(f"[recommend_books_node] No results found for query: {query}")
354
+ # await logger.log(f"No results found for query: {query}")
355
+ continue
356
+
357
+ print(f"[recommend_books_node] Results for query '{query}': {search_results}")
358
+
359
+ for res in search_results:
360
+ recommended_books.append({
361
+ "title": res.get("title", "No Title"),
362
+ "link": res.get("link", ""),
363
+ "snippet": res.get("snippet", "")
364
+ })
365
+ reasoning_steps.append(f"βœ… Found: {res.get('title', 'No Title')} ({res.get('link', '')})")
366
+
367
+ if not recommended_books:
368
+ reasoning_steps.append("No recommendations found across all queries.")
369
+
370
+ print("[recommend_books_node] Final recommendations:", recommended_books)
371
+ print("[recommend_books_node] πŸ‘ˆ exit with", {"recommendations": recommended_books, "reasoning": "\n".join(reasoning_steps)})
372
+ return {
373
+ "recommendations": recommended_books,
374
+ "reasoning": "\n".join(reasoning_steps)
375
+ }
376
+
377
+ except Exception as e:
378
+ print("[extract_books_node] ❌ exception:", repr(e))
379
+ raise
380
+
381
+ # Node 3: Reason about the search results and generate recommendations
382
+
383
+ async def reasoning_node(state):
384
+ try:
385
+ recommendations = state.get("recommendations", [])
386
+ initial_reasoning = state.get("reasoning", "")
387
+
388
+ if not recommendations:
389
+ final_reasoning = initial_reasoning + "\nNo recommendations found to reason about."
390
+ print("[reasoning_node] No recommendations to process.")
391
+ return {"final_recommendations": [], "final_reasoning": final_reasoning}
392
+
393
+ # Format recommendations as input for the LLM
394
+ recommendations_text = "\n".join(
395
+ [f"Title: {rec['title']}\nLink: {rec['link']}\nSnippet: {rec['snippet']}\n" for rec in recommendations]
396
+ )
397
+
398
+ prompt = (
399
+ "You are a helpful book recommendation expert. You are given a web search result. "
400
+ "Analyze it and select the most relevant book recommendations. Explain why you recommend each book. "
401
+ "Do not recommend the same books from the user input!"
402
+ "IMPORTANT: Output ONLY a valid JSON array with this exact format:\n"
403
+ '[{"title": "Book Title", "reason": "Why this book is recommended", "link": "URL"}]\n'
404
+ "Rules:\n"
405
+ "- Use double quotes for all strings\n"
406
+ "- No trailing commas\n"
407
+ "- No markdown formatting or code blocks\n"
408
+ "- No explanations or extra text\n"
409
+ "- If no good recommendations, return empty array: []\n\n"
410
+ f"Books found from search:\n{recommendations_text}"
411
+ )
412
+
413
+ print("[reasoning_node] Prompt sent to LLM:\n", prompt)
414
+
415
+ response = await chat(
416
+ model="mistralai/Mistral-7B-Instruct-v0.2",
417
+ messages=[{"role":"user","content": prompt}]
418
+ )
419
+
420
+ content = response['message']['content']
421
+
422
+ print("[reasoning_node] Raw LLM response:\n", repr(content))
423
+ print(f"[reasoning_node] Response type: {type(content)}, length: {len(content)}")
424
+
425
+ # Use the new safe JSON parsing function
426
+ final_recommendations = safe_json_parse(content, fallback_value=[])
427
+ print("[reasoning_node] Parsed final recommendations:", final_recommendations)
428
+
429
+ # Compose final reasoning combining initial and LLM results
430
+ final_reasoning = initial_reasoning + "\n\nFinal reasoning:\n"
431
+ for rec in final_recommendations:
432
+ final_reasoning += f"βœ… Recommended: {rec.get('title', 'Unknown')} - {rec.get('reason', 'No reason provided.')}\n"
433
+
434
+ # Validate final recommendations
435
+ validated_recommendations = []
436
+ if isinstance(final_recommendations, list):
437
+ for rec in final_recommendations:
438
+ if isinstance(rec, dict):
439
+ validated_rec = {
440
+ "title": str(rec.get("title", "")).strip(),
441
+ "reason": str(rec.get("reason", "")).strip(),
442
+ "link": str(rec.get("link", "")).strip()
443
+ }
444
+ if validated_rec["title"]: # Only add if title is not empty
445
+ validated_recommendations.append(validated_rec)
446
+
447
+ print("[reasoning_node] Validated final recommendations:", validated_recommendations)
448
+ print("[reasoning_node] Final reasoning:\n", final_reasoning)
449
+
450
+ return {
451
+ "final_recommendations": validated_recommendations,
452
+ "final_reasoning": final_reasoning
453
+ }
454
+
455
+ except Exception as e:
456
+ print("[reasoning_node] ❌ exception:", repr(e))
457
+ print("[reasoning_node] Traceback:\n", traceback.format_exc())
458
+ raise
459
+
460
+
461
+
462
+ # Build the graph
463
+ def build_graph():
464
+ graph = StateGraph(dict)
465
+
466
+ graph.add_node("extract_books", extract_books_node)
467
+ graph.add_node("complete_authors", complete_authors_node) # <-- New node
468
+ graph.add_node("recommend_books", recommend_books_node)
469
+ graph.add_node("reasoning", reasoning_node)
470
+
471
+ # Define edges
472
+ graph.add_edge("extract_books", "complete_authors") # Modified
473
+ graph.add_edge("complete_authors", "recommend_books") # Modified
474
+ graph.add_edge("recommend_books", "reasoning")
475
+ graph.add_edge("reasoning", END)
476
+
477
+ graph.set_entry_point("extract_books")
478
+ return graph.compile()
app.py CHANGED
@@ -10,10 +10,16 @@ async def run_book_recommender(user_input):
10
  final_state = None
11
 
12
  try:
 
13
  async for state in graph.astream(initial_state):
 
 
14
  final_state = state
 
15
  except Exception as e:
16
  print("πŸ”₯ Exception while streaming graph:", e)
 
 
17
  raise
18
 
19
  if final_state is None:
@@ -21,30 +27,40 @@ async def run_book_recommender(user_input):
21
  "final_recommendations": [],
22
  "final_reasoning": "⚠️ Graph never yielded a final state."
23
  }
 
 
 
 
 
 
24
 
25
- reasoning_data = final_state.get("reasoning", {})
26
- recs = reasoning_data.get("final_recommendations", [])
27
- reasoning = reasoning_data.get("final_reasoning", "")
28
 
29
  # Defensive formatting of recommendations
30
  try:
31
- if isinstance(recs, list) and all(isinstance(r, dict) for r in recs):
 
 
 
 
 
 
 
 
 
 
32
  # Format nicely as before
33
  recs_text = "\n\n".join(
34
  f"πŸ“˜ {r.get('title', 'Unknown Title')}\nπŸ”— {r.get('link','')}\nπŸ’‘ {r.get('reason','')}"
35
- for r in recs
36
  )
37
- if not recs_text.strip():
38
- recs_text = "No recommendations found."
39
  else:
40
- # For any other structure, try pretty-printing JSON or just string conversion
41
- try:
42
- recs_text = json.dumps(recs, indent=2, ensure_ascii=False)
43
- except Exception:
44
- recs_text = pformat(recs)
45
- if not recs_text.strip():
46
- recs_text = "No recommendations found."
47
  except Exception as e:
 
48
  recs_text = f"Error formatting recommendations: {e}"
49
 
50
  return recs_text, reasoning
 
10
  final_state = None
11
 
12
  try:
13
+ step_count = 0
14
  async for state in graph.astream(initial_state):
15
+ step_count += 1
16
+ print(f"πŸ” Step {step_count}: State keys = {list(state.keys())}")
17
  final_state = state
18
+ print(f"βœ… Graph completed in {step_count} steps")
19
  except Exception as e:
20
  print("πŸ”₯ Exception while streaming graph:", e)
21
+ import traceback
22
+ print("πŸ”₯ Traceback:", traceback.format_exc())
23
  raise
24
 
25
  if final_state is None:
 
27
  "final_recommendations": [],
28
  "final_reasoning": "⚠️ Graph never yielded a final state."
29
  }
30
+
31
+ # Ensure we have the expected keys in final_state
32
+ if "final_recommendations" not in final_state:
33
+ final_state["final_recommendations"] = []
34
+ if "final_reasoning" not in final_state:
35
+ final_state["final_reasoning"] = "⚠️ Missing reasoning data from graph execution."
36
 
37
+ # Access the final state directly - the reasoning node returns these keys directly
38
+ recs = final_state.get("final_recommendations", [])
39
+ reasoning = final_state.get("final_reasoning", "")
40
 
41
  # Defensive formatting of recommendations
42
  try:
43
+ # Ensure recs is a list
44
+ if not isinstance(recs, list):
45
+ recs = []
46
+
47
+ # Filter out invalid entries
48
+ valid_recs = []
49
+ for r in recs:
50
+ if isinstance(r, dict) and r.get('title'):
51
+ valid_recs.append(r)
52
+
53
+ if valid_recs:
54
  # Format nicely as before
55
  recs_text = "\n\n".join(
56
  f"πŸ“˜ {r.get('title', 'Unknown Title')}\nπŸ”— {r.get('link','')}\nπŸ’‘ {r.get('reason','')}"
57
+ for r in valid_recs
58
  )
 
 
59
  else:
60
+ recs_text = "No recommendations found."
61
+
 
 
 
 
 
62
  except Exception as e:
63
+ print(f"Error formatting recommendations: {e}")
64
  recs_text = f"Error formatting recommendations: {e}"
65
 
66
  return recs_text, reasoning