Guiyom commited on
Commit
8fdb5c7
·
verified ·
1 Parent(s): db9a2da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -62
app.py CHANGED
@@ -162,52 +162,69 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
162
  initial_request: str, qa: str, target_style: str, knowledge_crumbs: str,
163
  complementary_guidance: str) -> (str, str):
164
  """
165
- Fine-tunes an HTML report based on a user’s correction request.
166
 
167
- Steps:
168
- 1. Identify relevant snippet(s) from the report that need adjustment by calling the LLM.
169
- 2. Using BeautifulSoup, find those snippet(s) in report_html.
170
- 3. For each snippet, call the LLM to generate a corrected version given the user request,
171
- keeping in mind the full report context and search crumbs.
172
- 4. Replace the old snippet in the report with the corrected one.
173
- 5. Call the LLM to review the updated report and generate an updated reference table (if new references exist).
174
- 6. Return the updated report and append a summary of applied corrections to the QA log.
 
 
 
 
175
 
176
  Parameters:
177
- adjustment_request: The user request for corrections (e.g. "fix the visual after 'xyz'").
 
178
  openai_api_key: OpenAI API Key.
179
  serpapi_api_key: SERPAPI API Key.
180
- report_html: The full HTML of the current report.
181
- initial_request: The original research query/original request.
182
- qa: Existing clarification Q&A.
183
- target_style: The target style for the report.
184
- knowledge_crumbs: Aggregated source/crumb content.
185
- complementary_guidance: Any additional guidance.
186
-
187
  Returns:
188
- A tuple (updated_report_html, updated_qa)
189
  """
190
  import os
191
  import json
192
  import logging
193
  from bs4 import BeautifulSoup
194
 
195
- # Set API keys in environment variables
196
  os.environ["OPENAI_API_KEY"] = openai_api_key
197
  os.environ["SERPAPI_API_KEY"] = serpapi_api_key
198
 
199
  logging.info("fine_tune_report: Starting fine-tuning process based on the adjustment request.")
200
 
201
- # Step 1: Identify the snippet(s) in the report relevant to the adjustment.
202
- prompt_identify = (f"You are a meticulous technical editor. Below is the full report HTML and a user adjustment request. "
203
- f"Based on the user instruction, extract and output the minimal, unique HTML snippet(s) (including their container tags) "
204
- f"from the report that need fixing. Output your answer as a JSON object with a key \"identified_snippets\" mapping to a list of HTML snippets only (no commentary).\n\n"
205
- f"Full Report HTML:\n{report_html}\n\n"
206
- f"User Adjustment Request:\n{adjustment_request}\n\n"
207
- f"Only output valid JSON.")
 
 
 
 
 
 
 
 
 
 
 
208
 
209
  response_identify = openai_call(prompt=prompt_identify, model="o3-mini", max_tokens_param=1500, temperature=0)
210
  logging.info(f"fine_tune_report: Raw snippet identification response: {response_identify}")
 
211
  try:
212
  response_identify = response_identify.strip().strip("```")
213
  id_data = json.loads(response_identify)
@@ -216,66 +233,90 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
216
  logging.error(f"fine_tune_report: Error parsing identified snippets JSON: {e}")
217
  identified_snippets = []
218
 
219
- # If no snippets were identified, log an error and fall back (optional: you may choose to return without changes).
220
  if not identified_snippets:
221
- logging.warning("fine_tune_report: No specific snippets were identified for adjustment. Returning original report.")
222
  return report_html, qa
223
 
224
- # Step 2: For each identified snippet, extract it from the report and prepare to correct it.
 
 
225
  soup = BeautifulSoup(report_html, "html.parser")
226
  updated_report_html = report_html
227
  corrections_summary = []
228
 
229
  for snippet in identified_snippets:
230
  snippet = snippet.strip()
231
- # Check if the snippet text appears in the report
232
- if snippet not in updated_report_html:
233
- logging.warning(f"fine_tune_report: The following snippet was not found exactly in the report and will be skipped:\n{snippet}")
 
234
  continue
235
 
236
- # Step 3: For each snippet, prompt the LLM to apply the user-specified correction.
237
- prompt_adjust = (f"You are a technical editor. Given the following HTML snippet extracted from a larger report and the user request, "
238
- f"make only the changes necessary to address the instruction. Preserve all existing citations, formatting, and context. "
239
- f"Ensure that the overall style of the report remains consistent with the provided target style and that any new references (if any) "
240
- f"are clearly indicated. Output your answer as a JSON object with two keys: \"improved\" (the corrected HTML snippet) and \"summary\" "
241
- f"(a brief summary of the changes applied).\n\n"
242
- f"Overall Report HTML:\n{report_html}\n\n"
243
- f"Current Snippet to Adjust:\n{snippet}\n\n"
244
- f"User Adjustment Request:\n{adjustment_request}\n\n"
245
- f"Additional Guidance:\nTarget Style: {target_style}\nKnowledge Crumbs: {knowledge_crumbs}\nComplementary Guidance: {complementary_guidance}\n\n"
246
- f"Only output valid JSON.")
 
 
 
 
 
 
 
 
 
 
 
 
247
  response_adjust = openai_call(prompt=prompt_adjust, model="o3-mini", max_tokens_param=2000, temperature=0.0)
248
  logging.info(f"fine_tune_report: Raw adjustment response: {response_adjust}")
 
249
  try:
250
  response_adjust = response_adjust.strip().strip("```")
251
  adjust_data = json.loads(response_adjust)
252
- corrected_snippet = adjust_data.get("improved", "").strip()
253
  snippet_summary = adjust_data.get("summary", "").strip()
254
  except Exception as e:
255
  logging.error(f"fine_tune_report: Error parsing snippet adjustment JSON: {e}")
256
  continue
257
 
258
- if not corrected_snippet:
259
- logging.warning("fine_tune_report: No improved snippet was returned by the LLM; skipping this snippet.")
260
  continue
261
 
262
- corrections_summary.append(f"Changes applied to snippet: {snippet_summary}")
263
- # Step 4: Replace the original snippet with the improved snippet in the report HTML.
264
- updated_report_html = updated_report_html.replace(snippet, corrected_snippet, 1)
265
- logging.info("fine_tune_report: Snippet replaced in the report.")
266
-
267
- # Step 5: Update the reference table. Ask the LLM to review the updated report and generate an updated reference table if needed.
268
- prompt_refs = (f"You are a technical editor. Review the following updated report HTML. "
269
- f"If there are any new inline citations (formatted as [x]) that are not in the existing reference table, "
270
- f"generate an updated Reference Summary Table in valid HTML that includes all references. "
271
- f"Output only the HTML code for the updated reference table without any extra commentary.\n\n"
272
- f"Updated Report HTML:\n{updated_report_html}")
 
 
 
 
 
 
 
 
 
273
  updated_refs = openai_call(prompt=prompt_refs, model="o3-mini", max_tokens_param=1000, temperature=0.5)
274
  updated_refs = updated_refs.strip().strip("```")
275
 
276
  if updated_refs:
277
  soup_updated = BeautifulSoup(updated_report_html, "html.parser")
278
- # Look for a heading that includes "Reference Summary Table"
279
  ref_heading = soup_updated.find(lambda tag: tag.name in ["h1", "h2", "h3", "h4"] and "Reference Summary Table" in tag.get_text())
280
  if ref_heading:
281
  next_sibling = ref_heading.find_next_sibling()
@@ -287,14 +328,16 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
287
  except Exception as e:
288
  logging.error(f"fine_tune_report: Error replacing the reference table: {e}")
289
  else:
290
- logging.info("fine_tune_report: No sibling element found after the reference heading; skipping reference table update.")
291
  updated_report_html = str(soup_updated)
292
  else:
293
- logging.info("fine_tune_report: No existing reference table heading found; reference update skipped.")
294
  else:
295
- logging.info("fine_tune_report: LLM did not return an updated reference table; leaving original references intact.")
296
 
297
- # Step 6: Append corrections summary to the Q&A log.
 
 
298
  global_summary = "Corrections Applied Based on User Request:\n" + "\n".join(corrections_summary)
299
  updated_qa = qa.strip() + "\n----------\n" + global_summary
300
 
 
162
  initial_request: str, qa: str, target_style: str, knowledge_crumbs: str,
163
  complementary_guidance: str) -> (str, str):
164
  """
165
+ Fine-tunes an HTML report based on a user’s correction request by processing complete container elements.
166
 
167
+ Process Overview:
168
+ 1. The function submits the full report HTML along with the user’s adjustment request to the LLM.
169
+ The prompt instructs the model to output a JSON object containing the minimal unique container(s)
170
+ (including their outer HTML—e.g. <iframe>, <div>, or <table>) that correspond to the content that needs
171
+ fixing.
172
+ 2. The identified container snippet(s) are then located in the report using BeautifulSoup.
173
+ 3. For each container, a second LLM call is made to generate a corrected version that integrates the user
174
+ instructions while maintaining context, citations, and overall style.
175
+ 4. The old container markup is replaced by the corrected version directly in the BeautifulSoup object.
176
+ 5. Finally, if new inline citations have been introduced (beyond those in the reference table), a final LLM
177
+ call updates the reference table.
178
+ 6. A summary of the corrections is appended to the QA log.
179
 
180
  Parameters:
181
+ adjustment_request: A string such as "the visual after 'xyz' is not displaying properly, please fix it" or
182
+ "the introduction should be more detailed, adjust it" etc.
183
  openai_api_key: OpenAI API Key.
184
  serpapi_api_key: SERPAPI API Key.
185
+ report_html: A string containing the full HTML report.
186
+ initial_request: The original research query or request.
187
+ qa: Existing clarification Q&A log.
188
+ target_style: The stylistic guidelines the report should follow.
189
+ knowledge_crumbs: Aggregated source/search result content.
190
+ complementary_guidance: Additional instructions.
191
+
192
  Returns:
193
+ A tuple (updated_report_html, updated_qa) with the corrected report and updated QA log.
194
  """
195
  import os
196
  import json
197
  import logging
198
  from bs4 import BeautifulSoup
199
 
200
+ # Set API keys as environment variables for downstream calls.
201
  os.environ["OPENAI_API_KEY"] = openai_api_key
202
  os.environ["SERPAPI_API_KEY"] = serpapi_api_key
203
 
204
  logging.info("fine_tune_report: Starting fine-tuning process based on the adjustment request.")
205
 
206
+ # ---------------------------------------------------------------
207
+ # Step 1: Identify container snippet(s) needing adjustment.
208
+ #
209
+ # The prompt instructs the LLM to scan the full report and output a JSON object
210
+ # with a key "identified_snippets" that contains complete HTML container elements
211
+ # (including their outer tags) that uniquely correspond to the section(s) which
212
+ # should be adjusted per the user request.
213
+ # ---------------------------------------------------------------
214
+ prompt_identify = (
215
+ f"You are a meticulous technical editor. Below is the full report HTML together with a "
216
+ f"user adjustment request. Identify the minimal, unique container(s) that capture the key content "
217
+ f"relevant to the user instruction. The containers may be complete HTML elements such as a <div>, <iframe>, "
218
+ f"<table>, etc. Output a JSON object with the key \"identified_snippets\" that maps to a list of these container "
219
+ f"HTML snippets ONLY (include the outer tags). No commentary or additional text should be present.\n\n"
220
+ f"Full Report HTML:\n{report_html}\n\n"
221
+ f"User Adjustment Request:\n{adjustment_request}\n\n"
222
+ f"Only output valid JSON."
223
+ )
224
 
225
  response_identify = openai_call(prompt=prompt_identify, model="o3-mini", max_tokens_param=1500, temperature=0)
226
  logging.info(f"fine_tune_report: Raw snippet identification response: {response_identify}")
227
+
228
  try:
229
  response_identify = response_identify.strip().strip("```")
230
  id_data = json.loads(response_identify)
 
233
  logging.error(f"fine_tune_report: Error parsing identified snippets JSON: {e}")
234
  identified_snippets = []
235
 
 
236
  if not identified_snippets:
237
+ logging.warning("fine_tune_report: No specific container snippets were identified for adjustment. Returning original report.")
238
  return report_html, qa
239
 
240
+ # ---------------------------------------------------------------
241
+ # Step 2: For each identified container snippet, find it in the report.
242
+ # ---------------------------------------------------------------
243
  soup = BeautifulSoup(report_html, "html.parser")
244
  updated_report_html = report_html
245
  corrections_summary = []
246
 
247
  for snippet in identified_snippets:
248
  snippet = snippet.strip()
249
+ # Use BeautifulSoup to search for a tag whose complete outer HTML contains the snippet.
250
+ candidate = soup.find(lambda tag: snippet in str(tag))
251
+ if not candidate:
252
+ logging.warning(f"fine_tune_report: The snippet could not be uniquely located in the report:\n{snippet}")
253
  continue
254
 
255
+ original_container_html = str(candidate)
256
+ logging.info("fine_tune_report: Found container snippet for adjustment.")
257
+
258
+ # ---------------------------------------------------------------
259
+ # Step 3: Send a prompt to the LLM to correct this container.
260
+ #
261
+ # Here the LLM is given the entire current container (the extracted full HTML)
262
+ # and the full report context (and other guidance) and is asked to produce a corrected
263
+ # version that applies the adjustment request.
264
+ # ---------------------------------------------------------------
265
+ prompt_adjust = (
266
+ f"You are a technical editor. Given the following HTML container (with its outer tags) "
267
+ f"extracted from a larger report and based on the user adjustment request, produce a corrected "
268
+ f"version by making only the changes required. Preserve existing inline citations, formatting, and contextual details. "
269
+ f"Ensure the updated content remains consistent with the overall report style. Output your answer as a JSON object "
270
+ f"with exactly two keys: \"improved\" (the corrected container's full HTML) and \"summary\" (a brief explanation of the changes applied).\n\n"
271
+ f"Overall Report HTML:\n{report_html}\n\n"
272
+ f"Original Container to Adjust:\n{original_container_html}\n\n"
273
+ f"User Adjustment Request:\n{adjustment_request}\n\n"
274
+ f"Additional Guidance:\nTarget Style: {target_style}\nKnowledge Crumbs: {knowledge_crumbs}\nComplementary Guidance: {complementary_guidance}\n\n"
275
+ f"Only output valid JSON."
276
+ )
277
+
278
  response_adjust = openai_call(prompt=prompt_adjust, model="o3-mini", max_tokens_param=2000, temperature=0.0)
279
  logging.info(f"fine_tune_report: Raw adjustment response: {response_adjust}")
280
+
281
  try:
282
  response_adjust = response_adjust.strip().strip("```")
283
  adjust_data = json.loads(response_adjust)
284
+ corrected_container = adjust_data.get("improved", "").strip()
285
  snippet_summary = adjust_data.get("summary", "").strip()
286
  except Exception as e:
287
  logging.error(f"fine_tune_report: Error parsing snippet adjustment JSON: {e}")
288
  continue
289
 
290
+ if not corrected_container:
291
+ logging.warning("fine_tune_report: No improved container was returned by the LLM; skipping this snippet.")
292
  continue
293
 
294
+ corrections_summary.append(f"Container corrected: {snippet_summary}")
295
+
296
+ # ---------------------------------------------------------------
297
+ # Step 4: Replace the original container in the BeautifulSoup object.
298
+ # ---------------------------------------------------------------
299
+ candidate.replace_with(BeautifulSoup(corrected_container, "html.parser"))
300
+ logging.info("fine_tune_report: Container snippet replaced.")
301
+
302
+ # Get the updated report HTML from the modified soup.
303
+ updated_report_html = str(soup)
304
+
305
+ # ---------------------------------------------------------------
306
+ # Step 5: Update the reference table if any new inline citations exist.
307
+ # ---------------------------------------------------------------
308
+ prompt_refs = (
309
+ f"You are a technical editor. Review the following updated report HTML. "
310
+ f"If there are any new inline citations (e.g., [x]) not present in the original reference table, "
311
+ f"generate an updated Reference Summary Table as valid HTML containing all references. Output only the HTML code for the updated reference table with no commentary.\n\n"
312
+ f"Updated Report HTML:\n{updated_report_html}"
313
+ )
314
  updated_refs = openai_call(prompt=prompt_refs, model="o3-mini", max_tokens_param=1000, temperature=0.5)
315
  updated_refs = updated_refs.strip().strip("```")
316
 
317
  if updated_refs:
318
  soup_updated = BeautifulSoup(updated_report_html, "html.parser")
319
+ # Look for a heading that includes something like "Reference Summary Table"
320
  ref_heading = soup_updated.find(lambda tag: tag.name in ["h1", "h2", "h3", "h4"] and "Reference Summary Table" in tag.get_text())
321
  if ref_heading:
322
  next_sibling = ref_heading.find_next_sibling()
 
328
  except Exception as e:
329
  logging.error(f"fine_tune_report: Error replacing the reference table: {e}")
330
  else:
331
+ logging.info("fine_tune_report: No sibling element found after reference heading; skipping reference update.")
332
  updated_report_html = str(soup_updated)
333
  else:
334
+ logging.info("fine_tune_report: No reference table heading found; reference update skipped.")
335
  else:
336
+ logging.info("fine_tune_report: No updated reference table returned; leaving references unchanged.")
337
 
338
+ # ---------------------------------------------------------------
339
+ # Step 6: Append a summary of corrections to the existing QA log.
340
+ # ---------------------------------------------------------------
341
  global_summary = "Corrections Applied Based on User Request:\n" + "\n".join(corrections_summary)
342
  updated_qa = qa.strip() + "\n----------\n" + global_summary
343