Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -120,7 +120,7 @@ def fine_tune_report(adjustmentguidelines: str, openai_api_key: str, serpapi_api
|
|
| 120 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
| 121 |
os.environ["SERPAPI_API_KEY"] = serpapi_api_key
|
| 122 |
|
| 123 |
-
# Parse the report HTML.
|
| 124 |
soup = BeautifulSoup(report_html, "html.parser")
|
| 125 |
# Create a working copy of the HTML as a string for exact replacement.
|
| 126 |
updated_report_html = report_html
|
|
@@ -290,16 +290,14 @@ Please output a JSON object with exactly two keys (no extra commentary):
|
|
| 290 |
chunk_summary = chunk_json.get("summary")
|
| 291 |
if improved_chunk and chunk_summary:
|
| 292 |
improvements_summary.append(f"Chunk {idx}: {chunk_summary}")
|
| 293 |
-
#
|
|
|
|
| 294 |
chunk_html_clean = chunk_html.strip()
|
| 295 |
improved_chunk_clean = improved_chunk.strip()
|
| 296 |
if chunk_html_clean in updated_report_html:
|
| 297 |
updated_report_html = updated_report_html.replace(chunk_html_clean, improved_chunk_clean, 1)
|
| 298 |
else:
|
| 299 |
-
logging.warning(f"Chunk {idx}: Exact snippet not found for replacement.
|
| 300 |
-
if soup.body:
|
| 301 |
-
soup.body.append(BeautifulSoup(improved_chunk, "html.parser"))
|
| 302 |
-
updated_report_html = str(soup)
|
| 303 |
else:
|
| 304 |
logging.error(f"Chunk {idx}: Incomplete JSON result: {chunk_result}")
|
| 305 |
except Exception as e:
|
|
@@ -318,21 +316,21 @@ Report HTML:
|
|
| 318 |
updated_references = openai_call(prompt=references_prompt, model="o3-mini", max_tokens_param=1000, temperature=0.5)
|
| 319 |
updated_references = updated_references.strip().strip("```")
|
| 320 |
|
| 321 |
-
# Instead of appending,
|
| 322 |
soup_updated = BeautifulSoup(updated_report_html, "html.parser")
|
| 323 |
ref_heading = soup_updated.find(lambda tag: tag.name == "h1" and "Reference Summary Table" in tag.get_text())
|
| 324 |
if ref_heading:
|
| 325 |
-
#
|
| 326 |
next_sibling = ref_heading.find_next_sibling()
|
| 327 |
if next_sibling:
|
| 328 |
new_ref_html = BeautifulSoup(updated_references, "html.parser")
|
| 329 |
next_sibling.replace_with(new_ref_html)
|
| 330 |
-
# Update the working HTML string
|
| 331 |
updated_report_html = str(soup_updated)
|
| 332 |
else:
|
| 333 |
-
#
|
| 334 |
logging.info("No existing reference table found; skipping reference replacement.")
|
| 335 |
|
|
|
|
| 336 |
summary_text = "Summary of Fine-Tuning Improvements:\n" + "\n".join(improvements_summary)
|
| 337 |
global_summary = "Combined Chunk Improvement Guidelines:\n" + "\n".join(all_guidelines)
|
| 338 |
updated_qa = qa.strip() + "\n----------\n" + global_summary + "\n" + summary_text
|
|
@@ -1692,7 +1690,7 @@ Important:
|
|
| 1692 |
- use p tag for the source and source reference number
|
| 1693 |
- after [[ put "Visual Placeholder n:" explicitly (with n as the ref number of the visual box created). This will be used in a regex
|
| 1694 |
- the only types of mermaid diagram that can be generated are: flowchart, sequence, gantt, pie, mindmap (no charts) // Take this into consideration when providing the instructions for the diagram
|
| 1695 |
-
- do not make
|
| 1696 |
- in the placeholder, no need to add the references to the source or its ref number, but make sure ALL of the data points required has a source from the learning and reference material hereafter
|
| 1697 |
- these placeholders text should contain:
|
| 1698 |
o the purpose of the future visual
|
|
@@ -1762,6 +1760,7 @@ Important:
|
|
| 1762 |
- after [[ put "Focus Placeholder n:" explicitly (with n as the ref number of the focus box created). This will be used in a regex
|
| 1763 |
- Do not add a title for the Focus placeholder just before the [[...]], the content that will replace the focus placeholder - generated later on - will already include a title
|
| 1764 |
- For the Table of contents: do not mention the pages, but make each item on separate line
|
|
|
|
| 1765 |
|
| 1766 |
// Structure of the overall report:
|
| 1767 |
- Abstract
|
|
|
|
| 120 |
os.environ["OPENAI_API_KEY"] = openai_api_key
|
| 121 |
os.environ["SERPAPI_API_KEY"] = serpapi_api_key
|
| 122 |
|
| 123 |
+
# Parse the original report HTML.
|
| 124 |
soup = BeautifulSoup(report_html, "html.parser")
|
| 125 |
# Create a working copy of the HTML as a string for exact replacement.
|
| 126 |
updated_report_html = report_html
|
|
|
|
| 290 |
chunk_summary = chunk_json.get("summary")
|
| 291 |
if improved_chunk and chunk_summary:
|
| 292 |
improvements_summary.append(f"Chunk {idx}: {chunk_summary}")
|
| 293 |
+
# Attempt to replace the old chunk with the improved chunk.
|
| 294 |
+
# Use string 'strip()' to remove any surrounding whitespace.
|
| 295 |
chunk_html_clean = chunk_html.strip()
|
| 296 |
improved_chunk_clean = improved_chunk.strip()
|
| 297 |
if chunk_html_clean in updated_report_html:
|
| 298 |
updated_report_html = updated_report_html.replace(chunk_html_clean, improved_chunk_clean, 1)
|
| 299 |
else:
|
| 300 |
+
logging.warning(f"Chunk {idx}: Exact snippet not found for replacement. Replacement not applied.")
|
|
|
|
|
|
|
|
|
|
| 301 |
else:
|
| 302 |
logging.error(f"Chunk {idx}: Incomplete JSON result: {chunk_result}")
|
| 303 |
except Exception as e:
|
|
|
|
| 316 |
updated_references = openai_call(prompt=references_prompt, model="o3-mini", max_tokens_param=1000, temperature=0.5)
|
| 317 |
updated_references = updated_references.strip().strip("```")
|
| 318 |
|
| 319 |
+
# Instead of appending, look for a references section and replace its content.
|
| 320 |
soup_updated = BeautifulSoup(updated_report_html, "html.parser")
|
| 321 |
ref_heading = soup_updated.find(lambda tag: tag.name == "h1" and "Reference Summary Table" in tag.get_text())
|
| 322 |
if ref_heading:
|
| 323 |
+
# Assume that the reference table is the next sibling.
|
| 324 |
next_sibling = ref_heading.find_next_sibling()
|
| 325 |
if next_sibling:
|
| 326 |
new_ref_html = BeautifulSoup(updated_references, "html.parser")
|
| 327 |
next_sibling.replace_with(new_ref_html)
|
|
|
|
| 328 |
updated_report_html = str(soup_updated)
|
| 329 |
else:
|
| 330 |
+
# No reference section found; do nothing.
|
| 331 |
logging.info("No existing reference table found; skipping reference replacement.")
|
| 332 |
|
| 333 |
+
# Do not append anything after the references.
|
| 334 |
summary_text = "Summary of Fine-Tuning Improvements:\n" + "\n".join(improvements_summary)
|
| 335 |
global_summary = "Combined Chunk Improvement Guidelines:\n" + "\n".join(all_guidelines)
|
| 336 |
updated_qa = qa.strip() + "\n----------\n" + global_summary + "\n" + summary_text
|
|
|
|
| 1690 |
- use p tag for the source and source reference number
|
| 1691 |
- after [[ put "Visual Placeholder n:" explicitly (with n as the ref number of the visual box created). This will be used in a regex
|
| 1692 |
- the only types of mermaid diagram that can be generated are: flowchart, sequence, gantt, pie, mindmap (no charts) // Take this into consideration when providing the instructions for the diagram
|
| 1693 |
+
- do not make mention in the report to "visual placeholders" just mention the visual and the number..
|
| 1694 |
- in the placeholder, no need to add the references to the source or its ref number, but make sure ALL of the data points required has a source from the learning and reference material hereafter
|
| 1695 |
- these placeholders text should contain:
|
| 1696 |
o the purpose of the future visual
|
|
|
|
| 1760 |
- after [[ put "Focus Placeholder n:" explicitly (with n as the ref number of the focus box created). This will be used in a regex
|
| 1761 |
- Do not add a title for the Focus placeholder just before the [[...]], the content that will replace the focus placeholder - generated later on - will already include a title
|
| 1762 |
- For the Table of contents: do not mention the pages, but make each item on separate line
|
| 1763 |
+
- The reference table at the end containing the citations details should have 4 columns: the ref number, the title of the document, the author(s, the URL - with hyperlink)
|
| 1764 |
|
| 1765 |
// Structure of the overall report:
|
| 1766 |
- Abstract
|