Guiyom commited on
Commit
db66925
·
verified ·
1 Parent(s): 3844fc5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -21
app.py CHANGED
@@ -96,30 +96,46 @@ def clean_llm_response(response: str) -> str:
96
 
97
  def expand_snippet_area(full_html: str, snippet: str) -> str:
98
  """
99
- Given the full HTML and a snippet of text, this function finds the element containing the snippet
100
- and traverses upward through the DOM to identify the first container tag that matches the allowed list.
101
- Allowed container tags include div, table, and iframe.
102
- The traversal stops as soon as a parent in the allowed list is found, or before reaching <body>.
103
- Returns the outer HTML of the found container.
104
  """
105
- allowed_tags = {"div", "table", "iframe"}
 
106
  soup = BeautifulSoup(full_html, "html.parser")
107
 
108
- # Find the tag containing the snippet
109
- candidate = soup.find(lambda tag: snippet in tag.get_text() if tag.get_text() else False)
110
-
111
- if candidate:
112
- current = candidate
113
- # Traverse upward to find the first matching container in the allowed list
114
- while current.parent is not None and current.parent.name.lower() != "body" and current.parent.name.lower() != "html":
115
- if current.parent.name.lower() in allowed_tags:
116
- current = current.parent
117
- break # Stop as soon as a match is found
118
- else:
119
- current = current.parent
120
- return str(current) # Return the outer HTML of the found container
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  else:
122
- return snippet # If snippet not found, return the snippet itself
123
 
124
 
125
  def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_key: str, report_html: str,
@@ -209,7 +225,7 @@ Only output valid JSON."""
209
  You are a technical editor.
210
  Given the following HTML container (with its outer tags) extracted from a larger report and based on the user adjustment request,
211
  produce a corrected version by making only the necessary changes. Preserve inline citations, formatting, and context.
212
- The updated version will put back precisely in the same location.
213
 
214
  // Context
215
  - Overall Report HTML:
 
96
 
97
  def expand_snippet_area(full_html: str, snippet: str) -> str:
98
  """
99
+ Given the full HTML and a snippet of text, this function finds the element that contains the snippet.
100
+ It then uses an iterative while loop to traverse upward (from the immediate parent to the top)
101
+ until the highest level iframe is reached or (if no iframe is present) until a div or table is
102
+ encountered—the first allowed container (div or table) found is used. If neither an iframe nor
103
+ an allowed container is found, it returns the candidate element itself.
104
  """
105
+ # Define allowed container tags for the non-iframe case.
106
+ allowed_tags = {"div", "table"}
107
  soup = BeautifulSoup(full_html, "html.parser")
108
 
109
+ # Find the candidate element that contains the snippet.
110
+ candidate = soup.find(lambda tag: tag.get_text() and snippet in tag.get_text())
111
+ if not candidate:
112
+ return snippet
113
+
114
+ iframe_candidate = None
115
+ allowed_candidate = None
116
+
117
+ # Start with the candidate's direct parent.
118
+ current = candidate.parent
119
+ while current is not None and current.name.lower() != "body":
120
+ tag_name = current.name.lower()
121
+ if tag_name == "iframe":
122
+ # Whenever we find an iframe, update the iframe candidate.
123
+ iframe_candidate = current
124
+ elif tag_name in allowed_tags and allowed_candidate is None:
125
+ # Record the first encountered allowed container if no iframe was encountered yet.
126
+ allowed_candidate = current
127
+ # Move upward.
128
+ current = current.parent
129
+
130
+ # If an iframe was encountered, return the outermost iframe.
131
+ if iframe_candidate is not None:
132
+ return str(iframe_candidate)
133
+ # Otherwise, if a div or table was found, return that.
134
+ elif allowed_candidate is not None:
135
+ return str(allowed_candidate)
136
+ # Otherwise, return the candidate element itself.
137
  else:
138
+ return str(candidate)
139
 
140
 
141
  def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_key: str, report_html: str,
 
225
  You are a technical editor.
226
  Given the following HTML container (with its outer tags) extracted from a larger report and based on the user adjustment request,
227
  produce a corrected version by making only the necessary changes. Preserve inline citations, formatting, and context.
228
+ The updated version will put back precisely in the same location, the output should have the same outer tags.
229
 
230
  // Context
231
  - Overall Report HTML: