Spaces:

10gen
/

deepsearchitv2

Running

App Files Files Community

Guiyom commited on Feb 26, 2025

Commit

30d9711

verified ·

1 Parent(s): fc071dc

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -11

app.py CHANGED Viewed

@@ -120,7 +120,7 @@ def fine_tune_report(adjustmentguidelines: str, openai_api_key: str, serpapi_api
     os.environ["OPENAI_API_KEY"] = openai_api_key
     os.environ["SERPAPI_API_KEY"] = serpapi_api_key
-    # Parse the report HTML.
     soup = BeautifulSoup(report_html, "html.parser")
     # Create a working copy of the HTML as a string for exact replacement.
     updated_report_html = report_html
@@ -290,16 +290,14 @@ Please output a JSON object with exactly two keys (no extra commentary):
             chunk_summary = chunk_json.get("summary")
             if improved_chunk and chunk_summary:
                 improvements_summary.append(f"Chunk {idx}: {chunk_summary}")
-                # Perform a direct, exact string replacement on the working HTML.
                 chunk_html_clean = chunk_html.strip()
                 improved_chunk_clean = improved_chunk.strip()
                 if chunk_html_clean in updated_report_html:
                     updated_report_html = updated_report_html.replace(chunk_html_clean, improved_chunk_clean, 1)
                 else:
-                    logging.warning(f"Chunk {idx}: Exact snippet not found for replacement. Appending improved snippet to end.")
-                    if soup.body:
-                        soup.body.append(BeautifulSoup(improved_chunk, "html.parser"))
-                        updated_report_html = str(soup)
             else:
                 logging.error(f"Chunk {idx}: Incomplete JSON result: {chunk_result}")
         except Exception as e:
@@ -318,21 +316,21 @@ Report HTML:
     updated_references = openai_call(prompt=references_prompt, model="o3-mini", max_tokens_param=1000, temperature=0.5)
     updated_references = updated_references.strip().strip("```")
-    # Instead of appending, check if a references section exists and replace its content.
     soup_updated = BeautifulSoup(updated_report_html, "html.parser")
     ref_heading = soup_updated.find(lambda tag: tag.name == "h1" and "Reference Summary Table" in tag.get_text())
     if ref_heading:
-        # Find the next sibling (which should be the references table or container)
         next_sibling = ref_heading.find_next_sibling()
         if next_sibling:
             new_ref_html = BeautifulSoup(updated_references, "html.parser")
             next_sibling.replace_with(new_ref_html)
-        # Update the working HTML string
         updated_report_html = str(soup_updated)
     else:
-        # If no reference section is found, do nothing.
         logging.info("No existing reference table found; skipping reference replacement.")
     summary_text = "Summary of Fine-Tuning Improvements:\n" + "\n".join(improvements_summary)
     global_summary = "Combined Chunk Improvement Guidelines:\n" + "\n".join(all_guidelines)
     updated_qa = qa.strip() + "\n----------\n" + global_summary + "\n" + summary_text
@@ -1692,7 +1690,7 @@ Important:
 - use p tag for the source and source reference number
 - after [[ put "Visual Placeholder n:" explicitly (with n as the ref number of the visual box created). This will be used in a regex
 - the only types of mermaid diagram that can be generated are: flowchart, sequence, gantt, pie, mindmap (no charts) // Take this into consideration when providing the instructions for the diagram
-- do not make reference in the report to "visual placeholders" just mention the visual and the number..
 - in the placeholder, no need to add the references to the source or its ref number, but make sure ALL of the data points required has a source from the learning and reference material hereafter
 - these placeholders text should contain:
     o the purpose of the future visual
@@ -1762,6 +1760,7 @@ Important:
 - after [[ put "Focus Placeholder n:" explicitly (with n as the ref number of the focus box created). This will be used in a regex
 - Do not add a title for the Focus placeholder just before the [[...]], the content that will replace the focus placeholder - generated later on - will already include a title
 - For the Table of contents: do not mention the pages, but make each item on separate line
 // Structure of the overall report:
 - Abstract

     os.environ["OPENAI_API_KEY"] = openai_api_key
     os.environ["SERPAPI_API_KEY"] = serpapi_api_key
+    # Parse the original report HTML.
     soup = BeautifulSoup(report_html, "html.parser")
     # Create a working copy of the HTML as a string for exact replacement.
     updated_report_html = report_html
             chunk_summary = chunk_json.get("summary")
             if improved_chunk and chunk_summary:
                 improvements_summary.append(f"Chunk {idx}: {chunk_summary}")
+                # Attempt to replace the old chunk with the improved chunk.
+                # Use string 'strip()' to remove any surrounding whitespace.
                 chunk_html_clean = chunk_html.strip()
                 improved_chunk_clean = improved_chunk.strip()
                 if chunk_html_clean in updated_report_html:
                     updated_report_html = updated_report_html.replace(chunk_html_clean, improved_chunk_clean, 1)
                 else:
+                    logging.warning(f"Chunk {idx}: Exact snippet not found for replacement. Replacement not applied.")
             else:
                 logging.error(f"Chunk {idx}: Incomplete JSON result: {chunk_result}")
         except Exception as e:
     updated_references = openai_call(prompt=references_prompt, model="o3-mini", max_tokens_param=1000, temperature=0.5)
     updated_references = updated_references.strip().strip("```")
+    # Instead of appending, look for a references section and replace its content.
     soup_updated = BeautifulSoup(updated_report_html, "html.parser")
     ref_heading = soup_updated.find(lambda tag: tag.name == "h1" and "Reference Summary Table" in tag.get_text())
     if ref_heading:
+        # Assume that the reference table is the next sibling.
         next_sibling = ref_heading.find_next_sibling()
         if next_sibling:
             new_ref_html = BeautifulSoup(updated_references, "html.parser")
             next_sibling.replace_with(new_ref_html)
         updated_report_html = str(soup_updated)
     else:
+        # No reference section found; do nothing.
         logging.info("No existing reference table found; skipping reference replacement.")
+    # Do not append anything after the references.
     summary_text = "Summary of Fine-Tuning Improvements:\n" + "\n".join(improvements_summary)
     global_summary = "Combined Chunk Improvement Guidelines:\n" + "\n".join(all_guidelines)
     updated_qa = qa.strip() + "\n----------\n" + global_summary + "\n" + summary_text
 - use p tag for the source and source reference number
 - after [[ put "Visual Placeholder n:" explicitly (with n as the ref number of the visual box created). This will be used in a regex
 - the only types of mermaid diagram that can be generated are: flowchart, sequence, gantt, pie, mindmap (no charts) // Take this into consideration when providing the instructions for the diagram
+- do not make mention in the report to "visual placeholders" just mention the visual and the number..
 - in the placeholder, no need to add the references to the source or its ref number, but make sure ALL of the data points required has a source from the learning and reference material hereafter
 - these placeholders text should contain:
     o the purpose of the future visual
 - after [[ put "Focus Placeholder n:" explicitly (with n as the ref number of the focus box created). This will be used in a regex
 - Do not add a title for the Focus placeholder just before the [[...]], the content that will replace the focus placeholder - generated later on - will already include a title
 - For the Table of contents: do not mention the pages, but make each item on separate line
+- The reference table at the end containing the citations details should have 4 columns: the ref number, the title of the document, the author(s, the URL - with hyperlink)
 // Structure of the overall report:
 - Abstract