Spaces:

10gen
/

deepsearchitv2

Runtime error

App Files Files Community

Guiyom commited on Feb 26, 2025

Commit

8fdb5c7

verified ·

1 Parent(s): db9a2da

Update app.py

Browse files

Files changed (1) hide show

app.py +105 -62

app.py CHANGED Viewed

@@ -162,52 +162,69 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
                      initial_request: str, qa: str, target_style: str, knowledge_crumbs: str,
                      complementary_guidance: str) -> (str, str):
     """
-    Fine-tunes an HTML report based on a user’s correction request.
-    Steps:
-      1. Identify relevant snippet(s) from the report that need adjustment by calling the LLM.
-      2. Using BeautifulSoup, find those snippet(s) in report_html.
-      3. For each snippet, call the LLM to generate a corrected version given the user request,
-         keeping in mind the full report context and search crumbs.
-      4. Replace the old snippet in the report with the corrected one.
-      5. Call the LLM to review the updated report and generate an updated reference table (if new references exist).
-      6. Return the updated report and append a summary of applied corrections to the QA log.
     Parameters:
-      adjustment_request: The user request for corrections (e.g. "fix the visual after 'xyz'").
       openai_api_key: OpenAI API Key.
       serpapi_api_key: SERPAPI API Key.
-      report_html: The full HTML of the current report.
-      initial_request: The original research query/original request.
-      qa: Existing clarification Q&A.
-      target_style: The target style for the report.
-      knowledge_crumbs: Aggregated source/crumb content.
-      complementary_guidance: Any additional guidance.
     Returns:
-      A tuple (updated_report_html, updated_qa)
     """
     import os
     import json
     import logging
     from bs4 import BeautifulSoup
-    # Set API keys in environment variables
     os.environ["OPENAI_API_KEY"] = openai_api_key
     os.environ["SERPAPI_API_KEY"] = serpapi_api_key
     logging.info("fine_tune_report: Starting fine-tuning process based on the adjustment request.")
-    # Step 1: Identify the snippet(s) in the report relevant to the adjustment.
-    prompt_identify = (f"You are a meticulous technical editor. Below is the full report HTML and a user adjustment request. "
-                         f"Based on the user instruction, extract and output the minimal, unique HTML snippet(s) (including their container tags) "
-                         f"from the report that need fixing. Output your answer as a JSON object with a key \"identified_snippets\" mapping to a list of HTML snippets only (no commentary).\n\n"
-                         f"Full Report HTML:\n{report_html}\n\n"
-                         f"User Adjustment Request:\n{adjustment_request}\n\n"
-                         f"Only output valid JSON.")
     response_identify = openai_call(prompt=prompt_identify, model="o3-mini", max_tokens_param=1500, temperature=0)
     logging.info(f"fine_tune_report: Raw snippet identification response: {response_identify}")
     try:
         response_identify = response_identify.strip().strip("```")
         id_data = json.loads(response_identify)
@@ -216,66 +233,90 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
         logging.error(f"fine_tune_report: Error parsing identified snippets JSON: {e}")
         identified_snippets = []
-    # If no snippets were identified, log an error and fall back (optional: you may choose to return without changes).
     if not identified_snippets:
-        logging.warning("fine_tune_report: No specific snippets were identified for adjustment. Returning original report.")
         return report_html, qa
-    # Step 2: For each identified snippet, extract it from the report and prepare to correct it.
     soup = BeautifulSoup(report_html, "html.parser")
     updated_report_html = report_html
     corrections_summary = []
     for snippet in identified_snippets:
         snippet = snippet.strip()
-        # Check if the snippet text appears in the report
-        if snippet not in updated_report_html:
-            logging.warning(f"fine_tune_report: The following snippet was not found exactly in the report and will be skipped:\n{snippet}")
             continue
-        # Step 3: For each snippet, prompt the LLM to apply the user-specified correction.
-        prompt_adjust = (f"You are a technical editor. Given the following HTML snippet extracted from a larger report and the user request, "
-                         f"make only the changes necessary to address the instruction. Preserve all existing citations, formatting, and context. "
-                         f"Ensure that the overall style of the report remains consistent with the provided target style and that any new references (if any) "
-                         f"are clearly indicated. Output your answer as a JSON object with two keys: \"improved\" (the corrected HTML snippet) and \"summary\" "
-                         f"(a brief summary of the changes applied).\n\n"
-                         f"Overall Report HTML:\n{report_html}\n\n"
-                         f"Current Snippet to Adjust:\n{snippet}\n\n"
-                         f"User Adjustment Request:\n{adjustment_request}\n\n"
-                         f"Additional Guidance:\nTarget Style: {target_style}\nKnowledge Crumbs: {knowledge_crumbs}\nComplementary Guidance: {complementary_guidance}\n\n"
-                         f"Only output valid JSON.")
         response_adjust = openai_call(prompt=prompt_adjust, model="o3-mini", max_tokens_param=2000, temperature=0.0)
         logging.info(f"fine_tune_report: Raw adjustment response: {response_adjust}")
         try:
             response_adjust = response_adjust.strip().strip("```")
             adjust_data = json.loads(response_adjust)
-            corrected_snippet = adjust_data.get("improved", "").strip()
             snippet_summary = adjust_data.get("summary", "").strip()
         except Exception as e:
             logging.error(f"fine_tune_report: Error parsing snippet adjustment JSON: {e}")
             continue
-        if not corrected_snippet:
-            logging.warning("fine_tune_report: No improved snippet was returned by the LLM; skipping this snippet.")
             continue
-        corrections_summary.append(f"Changes applied to snippet: {snippet_summary}")
-        # Step 4: Replace the original snippet with the improved snippet in the report HTML.
-        updated_report_html = updated_report_html.replace(snippet, corrected_snippet, 1)
-        logging.info("fine_tune_report: Snippet replaced in the report.")
-    # Step 5: Update the reference table. Ask the LLM to review the updated report and generate an updated reference table if needed.
-    prompt_refs = (f"You are a technical editor. Review the following updated report HTML. "
-                   f"If there are any new inline citations (formatted as [x]) that are not in the existing reference table, "
-                   f"generate an updated Reference Summary Table in valid HTML that includes all references. "
-                   f"Output only the HTML code for the updated reference table without any extra commentary.\n\n"
-                   f"Updated Report HTML:\n{updated_report_html}")
     updated_refs = openai_call(prompt=prompt_refs, model="o3-mini", max_tokens_param=1000, temperature=0.5)
     updated_refs = updated_refs.strip().strip("```")
     if updated_refs:
         soup_updated = BeautifulSoup(updated_report_html, "html.parser")
-        # Look for a heading that includes "Reference Summary Table"
         ref_heading = soup_updated.find(lambda tag: tag.name in ["h1", "h2", "h3", "h4"] and "Reference Summary Table" in tag.get_text())
         if ref_heading:
             next_sibling = ref_heading.find_next_sibling()
@@ -287,14 +328,16 @@ def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_k
                 except Exception as e:
                     logging.error(f"fine_tune_report: Error replacing the reference table: {e}")
             else:
-                logging.info("fine_tune_report: No sibling element found after the reference heading; skipping reference table update.")
             updated_report_html = str(soup_updated)
         else:
-            logging.info("fine_tune_report: No existing reference table heading found; reference update skipped.")
     else:
-        logging.info("fine_tune_report: LLM did not return an updated reference table; leaving original references intact.")
-    # Step 6: Append corrections summary to the Q&A log.
     global_summary = "Corrections Applied Based on User Request:\n" + "\n".join(corrections_summary)
     updated_qa = qa.strip() + "\n----------\n" + global_summary

                      initial_request: str, qa: str, target_style: str, knowledge_crumbs: str,
                      complementary_guidance: str) -> (str, str):
     """
+    Fine-tunes an HTML report based on a user’s correction request by processing complete container elements.
+    Process Overview:
+      1. The function submits the full report HTML along with the user’s adjustment request to the LLM.
+         The prompt instructs the model to output a JSON object containing the minimal unique container(s)
+         (including their outer HTML—e.g. <iframe>, <div>, or <table>) that correspond to the content that needs
+         fixing.
+      2. The identified container snippet(s) are then located in the report using BeautifulSoup.
+      3. For each container, a second LLM call is made to generate a corrected version that integrates the user
+         instructions while maintaining context, citations, and overall style.
+      4. The old container markup is replaced by the corrected version directly in the BeautifulSoup object.
+      5. Finally, if new inline citations have been introduced (beyond those in the reference table), a final LLM
+         call updates the reference table.
+      6. A summary of the corrections is appended to the QA log.
     Parameters:
+      adjustment_request: A string such as "the visual after 'xyz' is not displaying properly, please fix it" or
+                          "the introduction should be more detailed, adjust it" etc.
       openai_api_key: OpenAI API Key.
       serpapi_api_key: SERPAPI API Key.
+      report_html: A string containing the full HTML report.
+      initial_request: The original research query or request.
+      qa: Existing clarification Q&A log.
+      target_style: The stylistic guidelines the report should follow.
+      knowledge_crumbs: Aggregated source/search result content.
+      complementary_guidance: Additional instructions.
     Returns:
+      A tuple (updated_report_html, updated_qa) with the corrected report and updated QA log.
     """
     import os
     import json
     import logging
     from bs4 import BeautifulSoup
+    # Set API keys as environment variables for downstream calls.
     os.environ["OPENAI_API_KEY"] = openai_api_key
     os.environ["SERPAPI_API_KEY"] = serpapi_api_key
     logging.info("fine_tune_report: Starting fine-tuning process based on the adjustment request.")
+    # ---------------------------------------------------------------
+    # Step 1: Identify container snippet(s) needing adjustment.
+    #
+    # The prompt instructs the LLM to scan the full report and output a JSON object
+    # with a key "identified_snippets" that contains complete HTML container elements
+    # (including their outer tags) that uniquely correspond to the section(s) which
+    # should be adjusted per the user request.
+    # ---------------------------------------------------------------
+    prompt_identify = (
+        f"You are a meticulous technical editor. Below is the full report HTML together with a "
+        f"user adjustment request. Identify the minimal, unique container(s) that capture the key content "
+        f"relevant to the user instruction. The containers may be complete HTML elements such as a <div>, <iframe>, "
+        f"<table>, etc. Output a JSON object with the key \"identified_snippets\" that maps to a list of these container "
+        f"HTML snippets ONLY (include the outer tags). No commentary or additional text should be present.\n\n"
+        f"Full Report HTML:\n{report_html}\n\n"
+        f"User Adjustment Request:\n{adjustment_request}\n\n"
+        f"Only output valid JSON."
+    )
     response_identify = openai_call(prompt=prompt_identify, model="o3-mini", max_tokens_param=1500, temperature=0)
     logging.info(f"fine_tune_report: Raw snippet identification response: {response_identify}")
     try:
         response_identify = response_identify.strip().strip("```")
         id_data = json.loads(response_identify)
         logging.error(f"fine_tune_report: Error parsing identified snippets JSON: {e}")
         identified_snippets = []
     if not identified_snippets:
+        logging.warning("fine_tune_report: No specific container snippets were identified for adjustment. Returning original report.")
         return report_html, qa
+    # ---------------------------------------------------------------
+    # Step 2: For each identified container snippet, find it in the report.
+    # ---------------------------------------------------------------
     soup = BeautifulSoup(report_html, "html.parser")
     updated_report_html = report_html
     corrections_summary = []
     for snippet in identified_snippets:
         snippet = snippet.strip()
+        # Use BeautifulSoup to search for a tag whose complete outer HTML contains the snippet.
+        candidate = soup.find(lambda tag: snippet in str(tag))
+        if not candidate:
+            logging.warning(f"fine_tune_report: The snippet could not be uniquely located in the report:\n{snippet}")
             continue
+        original_container_html = str(candidate)
+        logging.info("fine_tune_report: Found container snippet for adjustment.")
+        # ---------------------------------------------------------------
+        # Step 3: Send a prompt to the LLM to correct this container.
+        #
+        # Here the LLM is given the entire current container (the extracted full HTML)
+        # and the full report context (and other guidance) and is asked to produce a corrected
+        # version that applies the adjustment request.
+        # ---------------------------------------------------------------
+        prompt_adjust = (
+            f"You are a technical editor. Given the following HTML container (with its outer tags) "
+            f"extracted from a larger report and based on the user adjustment request, produce a corrected "
+            f"version by making only the changes required. Preserve existing inline citations, formatting, and contextual details. "
+            f"Ensure the updated content remains consistent with the overall report style. Output your answer as a JSON object "
+            f"with exactly two keys: \"improved\" (the corrected container's full HTML) and \"summary\" (a brief explanation of the changes applied).\n\n"
+            f"Overall Report HTML:\n{report_html}\n\n"
+            f"Original Container to Adjust:\n{original_container_html}\n\n"
+            f"User Adjustment Request:\n{adjustment_request}\n\n"
+            f"Additional Guidance:\nTarget Style: {target_style}\nKnowledge Crumbs: {knowledge_crumbs}\nComplementary Guidance: {complementary_guidance}\n\n"
+            f"Only output valid JSON."
+        )
         response_adjust = openai_call(prompt=prompt_adjust, model="o3-mini", max_tokens_param=2000, temperature=0.0)
         logging.info(f"fine_tune_report: Raw adjustment response: {response_adjust}")
         try:
             response_adjust = response_adjust.strip().strip("```")
             adjust_data = json.loads(response_adjust)
+            corrected_container = adjust_data.get("improved", "").strip()
             snippet_summary = adjust_data.get("summary", "").strip()
         except Exception as e:
             logging.error(f"fine_tune_report: Error parsing snippet adjustment JSON: {e}")
             continue
+        if not corrected_container:
+            logging.warning("fine_tune_report: No improved container was returned by the LLM; skipping this snippet.")
             continue
+        corrections_summary.append(f"Container corrected: {snippet_summary}")
+        # ---------------------------------------------------------------
+        # Step 4: Replace the original container in the BeautifulSoup object.
+        # ---------------------------------------------------------------
+        candidate.replace_with(BeautifulSoup(corrected_container, "html.parser"))
+        logging.info("fine_tune_report: Container snippet replaced.")
+    # Get the updated report HTML from the modified soup.
+    updated_report_html = str(soup)
+    # ---------------------------------------------------------------
+    # Step 5: Update the reference table if any new inline citations exist.
+    # ---------------------------------------------------------------
+    prompt_refs = (
+        f"You are a technical editor. Review the following updated report HTML. "
+        f"If there are any new inline citations (e.g., [x]) not present in the original reference table, "
+        f"generate an updated Reference Summary Table as valid HTML containing all references. Output only the HTML code for the updated reference table with no commentary.\n\n"
+        f"Updated Report HTML:\n{updated_report_html}"
+    )
     updated_refs = openai_call(prompt=prompt_refs, model="o3-mini", max_tokens_param=1000, temperature=0.5)
     updated_refs = updated_refs.strip().strip("```")
     if updated_refs:
         soup_updated = BeautifulSoup(updated_report_html, "html.parser")
+        # Look for a heading that includes something like "Reference Summary Table"
         ref_heading = soup_updated.find(lambda tag: tag.name in ["h1", "h2", "h3", "h4"] and "Reference Summary Table" in tag.get_text())
         if ref_heading:
             next_sibling = ref_heading.find_next_sibling()
                 except Exception as e:
                     logging.error(f"fine_tune_report: Error replacing the reference table: {e}")
             else:
+                logging.info("fine_tune_report: No sibling element found after reference heading; skipping reference update.")
             updated_report_html = str(soup_updated)
         else:
+            logging.info("fine_tune_report: No reference table heading found; reference update skipped.")
     else:
+        logging.info("fine_tune_report: No updated reference table returned; leaving references unchanged.")
+    # ---------------------------------------------------------------
+    # Step 6: Append a summary of corrections to the existing QA log.
+    # ---------------------------------------------------------------
     global_summary = "Corrections Applied Based on User Request:\n" + "\n".join(corrections_summary)
     updated_qa = qa.strip() + "\n----------\n" + global_summary