Guiyom commited on
Commit
30d9711
·
verified ·
1 Parent(s): fc071dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -11
app.py CHANGED
@@ -120,7 +120,7 @@ def fine_tune_report(adjustmentguidelines: str, openai_api_key: str, serpapi_api
120
  os.environ["OPENAI_API_KEY"] = openai_api_key
121
  os.environ["SERPAPI_API_KEY"] = serpapi_api_key
122
 
123
- # Parse the report HTML.
124
  soup = BeautifulSoup(report_html, "html.parser")
125
  # Create a working copy of the HTML as a string for exact replacement.
126
  updated_report_html = report_html
@@ -290,16 +290,14 @@ Please output a JSON object with exactly two keys (no extra commentary):
290
  chunk_summary = chunk_json.get("summary")
291
  if improved_chunk and chunk_summary:
292
  improvements_summary.append(f"Chunk {idx}: {chunk_summary}")
293
- # Perform a direct, exact string replacement on the working HTML.
 
294
  chunk_html_clean = chunk_html.strip()
295
  improved_chunk_clean = improved_chunk.strip()
296
  if chunk_html_clean in updated_report_html:
297
  updated_report_html = updated_report_html.replace(chunk_html_clean, improved_chunk_clean, 1)
298
  else:
299
- logging.warning(f"Chunk {idx}: Exact snippet not found for replacement. Appending improved snippet to end.")
300
- if soup.body:
301
- soup.body.append(BeautifulSoup(improved_chunk, "html.parser"))
302
- updated_report_html = str(soup)
303
  else:
304
  logging.error(f"Chunk {idx}: Incomplete JSON result: {chunk_result}")
305
  except Exception as e:
@@ -318,21 +316,21 @@ Report HTML:
318
  updated_references = openai_call(prompt=references_prompt, model="o3-mini", max_tokens_param=1000, temperature=0.5)
319
  updated_references = updated_references.strip().strip("```")
320
 
321
- # Instead of appending, check if a references section exists and replace its content.
322
  soup_updated = BeautifulSoup(updated_report_html, "html.parser")
323
  ref_heading = soup_updated.find(lambda tag: tag.name == "h1" and "Reference Summary Table" in tag.get_text())
324
  if ref_heading:
325
- # Find the next sibling (which should be the references table or container)
326
  next_sibling = ref_heading.find_next_sibling()
327
  if next_sibling:
328
  new_ref_html = BeautifulSoup(updated_references, "html.parser")
329
  next_sibling.replace_with(new_ref_html)
330
- # Update the working HTML string
331
  updated_report_html = str(soup_updated)
332
  else:
333
- # If no reference section is found, do nothing.
334
  logging.info("No existing reference table found; skipping reference replacement.")
335
 
 
336
  summary_text = "Summary of Fine-Tuning Improvements:\n" + "\n".join(improvements_summary)
337
  global_summary = "Combined Chunk Improvement Guidelines:\n" + "\n".join(all_guidelines)
338
  updated_qa = qa.strip() + "\n----------\n" + global_summary + "\n" + summary_text
@@ -1692,7 +1690,7 @@ Important:
1692
  - use p tag for the source and source reference number
1693
  - after [[ put "Visual Placeholder n:" explicitly (with n as the ref number of the visual box created). This will be used in a regex
1694
  - the only types of mermaid diagram that can be generated are: flowchart, sequence, gantt, pie, mindmap (no charts) // Take this into consideration when providing the instructions for the diagram
1695
- - do not make reference in the report to "visual placeholders" just mention the visual and the number..
1696
  - in the placeholder, no need to add the references to the source or its ref number, but make sure ALL of the data points required has a source from the learning and reference material hereafter
1697
  - these placeholders text should contain:
1698
  o the purpose of the future visual
@@ -1762,6 +1760,7 @@ Important:
1762
  - after [[ put "Focus Placeholder n:" explicitly (with n as the ref number of the focus box created). This will be used in a regex
1763
  - Do not add a title for the Focus placeholder just before the [[...]], the content that will replace the focus placeholder - generated later on - will already include a title
1764
  - For the Table of contents: do not mention the pages, but make each item on separate line
 
1765
 
1766
  // Structure of the overall report:
1767
  - Abstract
 
120
  os.environ["OPENAI_API_KEY"] = openai_api_key
121
  os.environ["SERPAPI_API_KEY"] = serpapi_api_key
122
 
123
+ # Parse the original report HTML.
124
  soup = BeautifulSoup(report_html, "html.parser")
125
  # Create a working copy of the HTML as a string for exact replacement.
126
  updated_report_html = report_html
 
290
  chunk_summary = chunk_json.get("summary")
291
  if improved_chunk and chunk_summary:
292
  improvements_summary.append(f"Chunk {idx}: {chunk_summary}")
293
+ # Attempt to replace the old chunk with the improved chunk.
294
+ # Use string 'strip()' to remove any surrounding whitespace.
295
  chunk_html_clean = chunk_html.strip()
296
  improved_chunk_clean = improved_chunk.strip()
297
  if chunk_html_clean in updated_report_html:
298
  updated_report_html = updated_report_html.replace(chunk_html_clean, improved_chunk_clean, 1)
299
  else:
300
+ logging.warning(f"Chunk {idx}: Exact snippet not found for replacement. Replacement not applied.")
 
 
 
301
  else:
302
  logging.error(f"Chunk {idx}: Incomplete JSON result: {chunk_result}")
303
  except Exception as e:
 
316
  updated_references = openai_call(prompt=references_prompt, model="o3-mini", max_tokens_param=1000, temperature=0.5)
317
  updated_references = updated_references.strip().strip("```")
318
 
319
+ # Instead of appending, look for a references section and replace its content.
320
  soup_updated = BeautifulSoup(updated_report_html, "html.parser")
321
  ref_heading = soup_updated.find(lambda tag: tag.name == "h1" and "Reference Summary Table" in tag.get_text())
322
  if ref_heading:
323
+ # Assume that the reference table is the next sibling.
324
  next_sibling = ref_heading.find_next_sibling()
325
  if next_sibling:
326
  new_ref_html = BeautifulSoup(updated_references, "html.parser")
327
  next_sibling.replace_with(new_ref_html)
 
328
  updated_report_html = str(soup_updated)
329
  else:
330
+ # No reference section found; do nothing.
331
  logging.info("No existing reference table found; skipping reference replacement.")
332
 
333
+ # Do not append anything after the references.
334
  summary_text = "Summary of Fine-Tuning Improvements:\n" + "\n".join(improvements_summary)
335
  global_summary = "Combined Chunk Improvement Guidelines:\n" + "\n".join(all_guidelines)
336
  updated_qa = qa.strip() + "\n----------\n" + global_summary + "\n" + summary_text
 
1690
  - use p tag for the source and source reference number
1691
  - after [[ put "Visual Placeholder n:" explicitly (with n as the ref number of the visual box created). This will be used in a regex
1692
  - the only types of mermaid diagram that can be generated are: flowchart, sequence, gantt, pie, mindmap (no charts) // Take this into consideration when providing the instructions for the diagram
1693
+ - do not make mention in the report to "visual placeholders" just mention the visual and the number..
1694
  - in the placeholder, no need to add the references to the source or its ref number, but make sure ALL of the data points required has a source from the learning and reference material hereafter
1695
  - these placeholders text should contain:
1696
  o the purpose of the future visual
 
1760
  - after [[ put "Focus Placeholder n:" explicitly (with n as the ref number of the focus box created). This will be used in a regex
1761
  - Do not add a title for the Focus placeholder just before the [[...]], the content that will replace the focus placeholder - generated later on - will already include a title
1762
  - For the Table of contents: do not mention the pages, but make each item on separate line
1763
+ - The reference table at the end containing the citations details should have 4 columns: the ref number, the title of the document, the author(s, the URL - with hyperlink)
1764
 
1765
  // Structure of the overall report:
1766
  - Abstract