Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1004,6 +1004,27 @@ def improve_report_from_chat(user_message: str, chat_history: list, report_text:
|
|
| 1004 |
|
| 1005 |
# ============================================================================= Expand
|
| 1006 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1007 |
def expand_report(expansion_request: str, openai_api_key: str, serpapi_api_key: str, report_html: str,
|
| 1008 |
initial_request: str, qa: str, target_style: str, knowledge_crumbs: str,
|
| 1009 |
complementary_guidance: str) -> (str, str):
|
|
@@ -1055,6 +1076,13 @@ Only output valid JSON.
|
|
| 1055 |
logging.warning("expansion_report: No unique strings were identified for adjustment. Returning original report.")
|
| 1056 |
return report_html, qa
|
| 1057 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1058 |
# Step 2: Parse the report HTML once.
|
| 1059 |
soup = BeautifulSoup(report_html, "html.parser")
|
| 1060 |
corrections_summary = []
|
|
@@ -1084,13 +1112,19 @@ Only output valid JSON.
|
|
| 1084 |
logging.info("expansion_report: Found container for unique string adjustment:\n\n%s\n", original_container_html)
|
| 1085 |
|
| 1086 |
# Step 3: Adjust the container by asking the LLM to expand the content.
|
| 1087 |
-
#
|
|
|
|
|
|
|
| 1088 |
prompt_adjust = (f"""
|
| 1089 |
You are a technical editor.
|
| 1090 |
Given the following HTML container (including its outer tags) extracted from a larger report and based on the user expansion request, produce an expanded version by elaborating on the content.
|
| 1091 |
-
Preserve all inline citations (formatted as [x]) and
|
| 1092 |
-
|
| 1093 |
-
The
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1094 |
|
| 1095 |
- Overall Report HTML:
|
| 1096 |
{report_html}
|
|
@@ -1108,48 +1142,71 @@ Additional Guidance:
|
|
| 1108 |
- Complementary Guidance:
|
| 1109 |
{complementary_guidance}
|
| 1110 |
|
| 1111 |
-
Ensure that any inline citation (e.g., [1], [2], etc.) within the expanded content is preserved or newly included so they can all be captured in the final References Summary Table.
|
| 1112 |
-
|
| 1113 |
-
Output a JSON object with exactly two keys:
|
| 1114 |
-
- "expanded" (the expanded container's full HTML)
|
| 1115 |
-
- "summary" (a brief explanation of the changes, including citation updates if applicable)
|
| 1116 |
|
| 1117 |
-
|
|
|
|
| 1118 |
""")
|
| 1119 |
response_adjust = llm_call(prompt=prompt_adjust, model="o3-mini", temperature=0, max_tokens_param=10000)
|
| 1120 |
logging.info("expansion_report: Raw container adjustment response: %s", response_adjust)
|
| 1121 |
try:
|
| 1122 |
response_adjust = response_adjust.strip().strip("json").strip("```").strip()
|
| 1123 |
logging.info("Cleaned container adjustment response: %s", response_adjust)
|
| 1124 |
-
|
| 1125 |
-
|
| 1126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1127 |
except Exception as e:
|
| 1128 |
logging.error("expansion_report: Error parsing container adjustment JSON: %s", e)
|
| 1129 |
continue
|
| 1130 |
|
| 1131 |
-
|
| 1132 |
-
|
| 1133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1134 |
|
| 1135 |
-
corrections_summary.append(
|
| 1136 |
|
| 1137 |
-
# Step 4: Replace the original container with the updated version.
|
| 1138 |
container_tag.replace_with(BeautifulSoup(corrected_container, "html.parser"))
|
| 1139 |
logging.info("expansion_report: Updated container re-injected.")
|
| 1140 |
|
| 1141 |
updated_report_html = str(soup)
|
| 1142 |
|
| 1143 |
# Step 5 (and 6): Update the References Summary Table.
|
|
|
|
|
|
|
| 1144 |
prompt_refs = (
|
| 1145 |
f"\nYou are a technical editor.\n\n"
|
| 1146 |
"Review the following updated report HTML. If any new inline citations (e.g., [x]) have been added that are not in the original reference table, "
|
| 1147 |
-
"generate an updated References Summary Table as valid HTML."
|
| 1148 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1149 |
"Output only the updated table without any additional comments.\n\n"
|
| 1150 |
f"Updated Report HTML:\n{updated_report_html}"
|
| 1151 |
)
|
| 1152 |
-
# Increase token limit to ensure full output.
|
| 1153 |
updated_refs = llm_call(prompt=prompt_refs, model="o3-mini", temperature=0, max_tokens_param=10000)
|
| 1154 |
updated_refs = updated_refs.strip().strip("```").strip()
|
| 1155 |
|
|
|
|
| 1004 |
|
| 1005 |
# ============================================================================= Expand
|
| 1006 |
|
| 1007 |
+
def get_max_reference(report_html: str) -> int:
|
| 1008 |
+
"""
|
| 1009 |
+
Searches the provided report HTML for the References Summary Table and returns
|
| 1010 |
+
the maximum reference number currently used.
|
| 1011 |
+
"""
|
| 1012 |
+
soup_ = BeautifulSoup(report_html, "html.parser")
|
| 1013 |
+
max_ref = 0
|
| 1014 |
+
# Locate a heading that includes "references summary table" (case insensitive)
|
| 1015 |
+
ref_heading = soup_.find(lambda tag: tag.name in ["h1", "h2", "h3", "h4"] and "references summary table" in tag.get_text(strip=True).lower())
|
| 1016 |
+
if ref_heading:
|
| 1017 |
+
next_sibling = ref_heading.find_next_sibling()
|
| 1018 |
+
if next_sibling:
|
| 1019 |
+
# Get text from the assumed reference table block; assume each row starts with a number followed by a vertical bar.
|
| 1020 |
+
text = next_sibling.get_text(separator="\n")
|
| 1021 |
+
for line in text.splitlines():
|
| 1022 |
+
m = re.match(r'\s*(\d+)\s*\|', line)
|
| 1023 |
+
if m:
|
| 1024 |
+
num = int(m.group(1))
|
| 1025 |
+
max_ref = max(max_ref, num)
|
| 1026 |
+
return max_ref
|
| 1027 |
+
|
| 1028 |
def expand_report(expansion_request: str, openai_api_key: str, serpapi_api_key: str, report_html: str,
|
| 1029 |
initial_request: str, qa: str, target_style: str, knowledge_crumbs: str,
|
| 1030 |
complementary_guidance: str) -> (str, str):
|
|
|
|
| 1076 |
logging.warning("expansion_report: No unique strings were identified for adjustment. Returning original report.")
|
| 1077 |
return report_html, qa
|
| 1078 |
|
| 1079 |
+
# Determine the current maximum reference number in the report.
|
| 1080 |
+
current_max_ref = get_max_reference(report_html)
|
| 1081 |
+
logging.info(f"expansion_report: Current max reference number is {current_max_ref}")
|
| 1082 |
+
|
| 1083 |
+
# Prepare to accumulate newly added references across all expansions
|
| 1084 |
+
new_references_list = []
|
| 1085 |
+
|
| 1086 |
# Step 2: Parse the report HTML once.
|
| 1087 |
soup = BeautifulSoup(report_html, "html.parser")
|
| 1088 |
corrections_summary = []
|
|
|
|
| 1112 |
logging.info("expansion_report: Found container for unique string adjustment:\n\n%s\n", original_container_html)
|
| 1113 |
|
| 1114 |
# Step 3: Adjust the container by asking the LLM to expand the content.
|
| 1115 |
+
# The response is expected to include two parts separated by a line of ten hyphens:
|
| 1116 |
+
# - The expanded HTML snippet (to be reintegrated)
|
| 1117 |
+
# - New reference lines in the format: "# | name | author | url" (or blank if no new references)
|
| 1118 |
prompt_adjust = (f"""
|
| 1119 |
You are a technical editor.
|
| 1120 |
Given the following HTML container (including its outer tags) extracted from a larger report and based on the user expansion request, produce an expanded version by elaborating on the content.
|
| 1121 |
+
Preserve all inline citations (formatted as [x]) and, if you add any new citations, output them in a separate section.
|
| 1122 |
+
The output should be two parts separated by a single newline containing exactly ten hyphens (i.e., "----------").
|
| 1123 |
+
The first part is the expanded container’s full HTML (including its outer tags) to be reinserted as-is.
|
| 1124 |
+
The second part is a list (one per line) of any new references in the format:
|
| 1125 |
+
new reference number | name | author | url
|
| 1126 |
+
The new reference number should be the one you wish to assign for the new references.
|
| 1127 |
+
If no new references have been added, leave the second part blank.
|
| 1128 |
|
| 1129 |
- Overall Report HTML:
|
| 1130 |
{report_html}
|
|
|
|
| 1142 |
- Complementary Guidance:
|
| 1143 |
{complementary_guidance}
|
| 1144 |
|
| 1145 |
+
Ensure that any inline citation (e.g., [1], [2], etc.) within the expanded content is preserved or newly included so that they can all be captured in the final References Summary Table.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1146 |
|
| 1147 |
+
Output a JSON object with exactly one key "result" whose value is a string containing the two parts as specified (the expanded container, then a newline with "----------", then new reference lines).
|
| 1148 |
+
Only output valid JSON with no additional commentary or code fences.
|
| 1149 |
""")
|
| 1150 |
response_adjust = llm_call(prompt=prompt_adjust, model="o3-mini", temperature=0, max_tokens_param=10000)
|
| 1151 |
logging.info("expansion_report: Raw container adjustment response: %s", response_adjust)
|
| 1152 |
try:
|
| 1153 |
response_adjust = response_adjust.strip().strip("json").strip("```").strip()
|
| 1154 |
logging.info("Cleaned container adjustment response: %s", response_adjust)
|
| 1155 |
+
|
| 1156 |
+
# Parse the response: Split into two parts using the separator line "----------"
|
| 1157 |
+
parts = response_adjust.split("\n----------\n")
|
| 1158 |
+
if len(parts) == 2:
|
| 1159 |
+
corrected_container = parts[0].strip()
|
| 1160 |
+
new_refs_str = parts[1].strip()
|
| 1161 |
+
else:
|
| 1162 |
+
# Fallback in case no separator was found.
|
| 1163 |
+
corrected_container = response_adjust
|
| 1164 |
+
new_refs_str = ""
|
| 1165 |
except Exception as e:
|
| 1166 |
logging.error("expansion_report: Error parsing container adjustment JSON: %s", e)
|
| 1167 |
continue
|
| 1168 |
|
| 1169 |
+
# If new references exist, process and update their reference numbers.
|
| 1170 |
+
if new_refs_str:
|
| 1171 |
+
for line in new_refs_str.splitlines():
|
| 1172 |
+
line = line.strip()
|
| 1173 |
+
if line:
|
| 1174 |
+
ref_parts = line.split("|")
|
| 1175 |
+
if len(ref_parts) >= 4:
|
| 1176 |
+
# Reassign a new reference number by incrementing current_max_ref.
|
| 1177 |
+
current_max_ref += 1
|
| 1178 |
+
ref_name = ref_parts[1].strip()
|
| 1179 |
+
ref_author = ref_parts[2].strip()
|
| 1180 |
+
ref_url = ref_parts[3].strip()
|
| 1181 |
+
new_ref_line = f"{current_max_ref} | {ref_name} | {ref_author} | {ref_url}"
|
| 1182 |
+
new_references_list.append(new_ref_line)
|
| 1183 |
+
logging.info("expansion_report: Added new reference: %s", new_ref_line)
|
| 1184 |
+
else:
|
| 1185 |
+
logging.info("expansion_report: No new references found for this container.")
|
| 1186 |
|
| 1187 |
+
corrections_summary.append("Container expanded and references updated if applicable.")
|
| 1188 |
|
| 1189 |
+
# Step 4: Replace the original container with the updated version (only the expanded snippet).
|
| 1190 |
container_tag.replace_with(BeautifulSoup(corrected_container, "html.parser"))
|
| 1191 |
logging.info("expansion_report: Updated container re-injected.")
|
| 1192 |
|
| 1193 |
updated_report_html = str(soup)
|
| 1194 |
|
| 1195 |
# Step 5 (and 6): Update the References Summary Table.
|
| 1196 |
+
# Prepare new references text, if any.
|
| 1197 |
+
new_refs_text = "\n".join(new_references_list) if new_references_list else ""
|
| 1198 |
prompt_refs = (
|
| 1199 |
f"\nYou are a technical editor.\n\n"
|
| 1200 |
"Review the following updated report HTML. If any new inline citations (e.g., [x]) have been added that are not in the original reference table, "
|
| 1201 |
+
"generate an updated References Summary Table as valid HTML.\n"
|
| 1202 |
+
"Every inline citation found in the report must have a corresponding entry in this table.\n"
|
| 1203 |
+
"Use the following details for new references:\n"
|
| 1204 |
+
f"Current max reference number: {current_max_ref}\n"
|
| 1205 |
+
"New References (format: number | name | author | url):\n"
|
| 1206 |
+
f"{new_refs_text if new_refs_text else 'None'}\n\n"
|
| 1207 |
"Output only the updated table without any additional comments.\n\n"
|
| 1208 |
f"Updated Report HTML:\n{updated_report_html}"
|
| 1209 |
)
|
|
|
|
| 1210 |
updated_refs = llm_call(prompt=prompt_refs, model="o3-mini", temperature=0, max_tokens_param=10000)
|
| 1211 |
updated_refs = updated_refs.strip().strip("```").strip()
|
| 1212 |
|