Guiyom commited on
Commit
0192ae5
·
verified ·
1 Parent(s): 4cae79c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -21
app.py CHANGED
@@ -1004,6 +1004,27 @@ def improve_report_from_chat(user_message: str, chat_history: list, report_text:
1004
 
1005
  # ============================================================================= Expand
1006
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1007
  def expand_report(expansion_request: str, openai_api_key: str, serpapi_api_key: str, report_html: str,
1008
  initial_request: str, qa: str, target_style: str, knowledge_crumbs: str,
1009
  complementary_guidance: str) -> (str, str):
@@ -1055,6 +1076,13 @@ Only output valid JSON.
1055
  logging.warning("expansion_report: No unique strings were identified for adjustment. Returning original report.")
1056
  return report_html, qa
1057
 
 
 
 
 
 
 
 
1058
  # Step 2: Parse the report HTML once.
1059
  soup = BeautifulSoup(report_html, "html.parser")
1060
  corrections_summary = []
@@ -1084,13 +1112,19 @@ Only output valid JSON.
1084
  logging.info("expansion_report: Found container for unique string adjustment:\n\n%s\n", original_container_html)
1085
 
1086
  # Step 3: Adjust the container by asking the LLM to expand the content.
1087
- # Note the explicit instruction regarding inline citations.
 
 
1088
  prompt_adjust = (f"""
1089
  You are a technical editor.
1090
  Given the following HTML container (including its outer tags) extracted from a larger report and based on the user expansion request, produce an expanded version by elaborating on the content.
1091
- Preserve all inline citations (formatted as [x]) and ensure that if you add any new citations, they are consistent with sources that must also be reflected in the final References Summary Table.
1092
- Skip lines occasionally to improve readability.
1093
- The expanded version will be put back in the exact same location and must maintain the outer HTML tags.
 
 
 
 
1094
 
1095
  - Overall Report HTML:
1096
  {report_html}
@@ -1108,48 +1142,71 @@ Additional Guidance:
1108
  - Complementary Guidance:
1109
  {complementary_guidance}
1110
 
1111
- Ensure that any inline citation (e.g., [1], [2], etc.) within the expanded content is preserved or newly included so they can all be captured in the final References Summary Table.
1112
-
1113
- Output a JSON object with exactly two keys:
1114
- - "expanded" (the expanded container's full HTML)
1115
- - "summary" (a brief explanation of the changes, including citation updates if applicable)
1116
 
1117
- Only output valid JSON with no comments or code fences.
 
1118
  """)
1119
  response_adjust = llm_call(prompt=prompt_adjust, model="o3-mini", temperature=0, max_tokens_param=10000)
1120
  logging.info("expansion_report: Raw container adjustment response: %s", response_adjust)
1121
  try:
1122
  response_adjust = response_adjust.strip().strip("json").strip("```").strip()
1123
  logging.info("Cleaned container adjustment response: %s", response_adjust)
1124
- adjust_data = json.loads(response_adjust)
1125
- corrected_container = adjust_data.get("expanded", "").strip()
1126
- container_summary = adjust_data.get("summary", "").strip()
 
 
 
 
 
 
 
1127
  except Exception as e:
1128
  logging.error("expansion_report: Error parsing container adjustment JSON: %s", e)
1129
  continue
1130
 
1131
- if not corrected_container:
1132
- logging.warning("expansion_report: No expanded container was generated; skipping correction for this container.")
1133
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1134
 
1135
- corrections_summary.append(f"Container expanded: {container_summary}")
1136
 
1137
- # Step 4: Replace the original container with the updated version.
1138
  container_tag.replace_with(BeautifulSoup(corrected_container, "html.parser"))
1139
  logging.info("expansion_report: Updated container re-injected.")
1140
 
1141
  updated_report_html = str(soup)
1142
 
1143
  # Step 5 (and 6): Update the References Summary Table.
 
 
1144
  prompt_refs = (
1145
  f"\nYou are a technical editor.\n\n"
1146
  "Review the following updated report HTML. If any new inline citations (e.g., [x]) have been added that are not in the original reference table, "
1147
- "generate an updated References Summary Table as valid HTML."
1148
- " Every inline citation found in the report must have a corresponding entry in this table. "
 
 
 
 
1149
  "Output only the updated table without any additional comments.\n\n"
1150
  f"Updated Report HTML:\n{updated_report_html}"
1151
  )
1152
- # Increase token limit to ensure full output.
1153
  updated_refs = llm_call(prompt=prompt_refs, model="o3-mini", temperature=0, max_tokens_param=10000)
1154
  updated_refs = updated_refs.strip().strip("```").strip()
1155
 
 
1004
 
1005
  # ============================================================================= Expand
1006
 
1007
+ def get_max_reference(report_html: str) -> int:
1008
+ """
1009
+ Searches the provided report HTML for the References Summary Table and returns
1010
+ the maximum reference number currently used.
1011
+ """
1012
+ soup_ = BeautifulSoup(report_html, "html.parser")
1013
+ max_ref = 0
1014
+ # Locate a heading that includes "references summary table" (case insensitive)
1015
+ ref_heading = soup_.find(lambda tag: tag.name in ["h1", "h2", "h3", "h4"] and "references summary table" in tag.get_text(strip=True).lower())
1016
+ if ref_heading:
1017
+ next_sibling = ref_heading.find_next_sibling()
1018
+ if next_sibling:
1019
+ # Get text from the assumed reference table block; assume each row starts with a number followed by a vertical bar.
1020
+ text = next_sibling.get_text(separator="\n")
1021
+ for line in text.splitlines():
1022
+ m = re.match(r'\s*(\d+)\s*\|', line)
1023
+ if m:
1024
+ num = int(m.group(1))
1025
+ max_ref = max(max_ref, num)
1026
+ return max_ref
1027
+
1028
  def expand_report(expansion_request: str, openai_api_key: str, serpapi_api_key: str, report_html: str,
1029
  initial_request: str, qa: str, target_style: str, knowledge_crumbs: str,
1030
  complementary_guidance: str) -> (str, str):
 
1076
  logging.warning("expansion_report: No unique strings were identified for adjustment. Returning original report.")
1077
  return report_html, qa
1078
 
1079
+ # Determine the current maximum reference number in the report.
1080
+ current_max_ref = get_max_reference(report_html)
1081
+ logging.info(f"expansion_report: Current max reference number is {current_max_ref}")
1082
+
1083
+ # Prepare to accumulate newly added references across all expansions
1084
+ new_references_list = []
1085
+
1086
  # Step 2: Parse the report HTML once.
1087
  soup = BeautifulSoup(report_html, "html.parser")
1088
  corrections_summary = []
 
1112
  logging.info("expansion_report: Found container for unique string adjustment:\n\n%s\n", original_container_html)
1113
 
1114
  # Step 3: Adjust the container by asking the LLM to expand the content.
1115
+ # The response is expected to include two parts separated by a line of ten hyphens:
1116
+ # - The expanded HTML snippet (to be reintegrated)
1117
+ # - New reference lines in the format: "# | name | author | url" (or blank if no new references)
1118
  prompt_adjust = (f"""
1119
  You are a technical editor.
1120
  Given the following HTML container (including its outer tags) extracted from a larger report and based on the user expansion request, produce an expanded version by elaborating on the content.
1121
+ Preserve all inline citations (formatted as [x]) and, if you add any new citations, output them in a separate section.
1122
+ The output should be two parts separated by a single newline containing exactly ten hyphens (i.e., "----------").
1123
+ The first part is the expanded container’s full HTML (including its outer tags) to be reinserted as-is.
1124
+ The second part is a list (one per line) of any new references in the format:
1125
+ new reference number | name | author | url
1126
+ The new reference number should be the one you wish to assign for the new references.
1127
+ If no new references have been added, leave the second part blank.
1128
 
1129
  - Overall Report HTML:
1130
  {report_html}
 
1142
  - Complementary Guidance:
1143
  {complementary_guidance}
1144
 
1145
+ Ensure that any inline citation (e.g., [1], [2], etc.) within the expanded content is preserved or newly included so that they can all be captured in the final References Summary Table.
 
 
 
 
1146
 
1147
+ Output a JSON object with exactly one key "result" whose value is a string containing the two parts as specified (the expanded container, then a newline with "----------", then new reference lines).
1148
+ Only output valid JSON with no additional commentary or code fences.
1149
  """)
1150
  response_adjust = llm_call(prompt=prompt_adjust, model="o3-mini", temperature=0, max_tokens_param=10000)
1151
  logging.info("expansion_report: Raw container adjustment response: %s", response_adjust)
1152
  try:
1153
  response_adjust = response_adjust.strip().strip("json").strip("```").strip()
1154
  logging.info("Cleaned container adjustment response: %s", response_adjust)
1155
+
1156
+ # Parse the response: Split into two parts using the separator line "----------"
1157
+ parts = response_adjust.split("\n----------\n")
1158
+ if len(parts) == 2:
1159
+ corrected_container = parts[0].strip()
1160
+ new_refs_str = parts[1].strip()
1161
+ else:
1162
+ # Fallback in case no separator was found.
1163
+ corrected_container = response_adjust
1164
+ new_refs_str = ""
1165
  except Exception as e:
1166
  logging.error("expansion_report: Error parsing container adjustment JSON: %s", e)
1167
  continue
1168
 
1169
+ # If new references exist, process and update their reference numbers.
1170
+ if new_refs_str:
1171
+ for line in new_refs_str.splitlines():
1172
+ line = line.strip()
1173
+ if line:
1174
+ ref_parts = line.split("|")
1175
+ if len(ref_parts) >= 4:
1176
+ # Reassign a new reference number by incrementing current_max_ref.
1177
+ current_max_ref += 1
1178
+ ref_name = ref_parts[1].strip()
1179
+ ref_author = ref_parts[2].strip()
1180
+ ref_url = ref_parts[3].strip()
1181
+ new_ref_line = f"{current_max_ref} | {ref_name} | {ref_author} | {ref_url}"
1182
+ new_references_list.append(new_ref_line)
1183
+ logging.info("expansion_report: Added new reference: %s", new_ref_line)
1184
+ else:
1185
+ logging.info("expansion_report: No new references found for this container.")
1186
 
1187
+ corrections_summary.append("Container expanded and references updated if applicable.")
1188
 
1189
+ # Step 4: Replace the original container with the updated version (only the expanded snippet).
1190
  container_tag.replace_with(BeautifulSoup(corrected_container, "html.parser"))
1191
  logging.info("expansion_report: Updated container re-injected.")
1192
 
1193
  updated_report_html = str(soup)
1194
 
1195
  # Step 5 (and 6): Update the References Summary Table.
1196
+ # Prepare new references text, if any.
1197
+ new_refs_text = "\n".join(new_references_list) if new_references_list else ""
1198
  prompt_refs = (
1199
  f"\nYou are a technical editor.\n\n"
1200
  "Review the following updated report HTML. If any new inline citations (e.g., [x]) have been added that are not in the original reference table, "
1201
+ "generate an updated References Summary Table as valid HTML.\n"
1202
+ "Every inline citation found in the report must have a corresponding entry in this table.\n"
1203
+ "Use the following details for new references:\n"
1204
+ f"Current max reference number: {current_max_ref}\n"
1205
+ "New References (format: number | name | author | url):\n"
1206
+ f"{new_refs_text if new_refs_text else 'None'}\n\n"
1207
  "Output only the updated table without any additional comments.\n\n"
1208
  f"Updated Report HTML:\n{updated_report_html}"
1209
  )
 
1210
  updated_refs = llm_call(prompt=prompt_refs, model="o3-mini", temperature=0, max_tokens_param=10000)
1211
  updated_refs = updated_refs.strip().strip("```").strip()
1212