Guiyom commited on
Commit
3844fc5
·
verified ·
1 Parent(s): d581dae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -96,25 +96,30 @@ def clean_llm_response(response: str) -> str:
96
 
97
  def expand_snippet_area(full_html: str, snippet: str) -> str:
98
  """
99
- Given the full HTML and a small snippet (e.g., containing a keyword),
100
- find the element in which that snippet appears and traverse upward through the DOM
101
- until a larger container is reached. Allowed container tags include div, table, iframe, and section.
102
- The traversal stops when the parent is no longer in the allowed list or before reaching <body>.
103
  Returns the outer HTML of the found container.
104
  """
105
  allowed_tags = {"div", "table", "iframe"}
106
  soup = BeautifulSoup(full_html, "html.parser")
 
 
107
  candidate = soup.find(lambda tag: snippet in tag.get_text() if tag.get_text() else False)
 
108
  if candidate:
109
  current = candidate
 
110
  while current.parent is not None and current.parent.name.lower() != "body" and current.parent.name.lower() != "html":
111
  if current.parent.name.lower() in allowed_tags:
112
  current = current.parent
 
113
  else:
114
- break
115
- return str(current)
116
  else:
117
- return snippet
118
 
119
 
120
  def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_key: str, report_html: str,
 
96
 
97
  def expand_snippet_area(full_html: str, snippet: str) -> str:
98
  """
99
+ Given the full HTML and a snippet of text, this function finds the element containing the snippet
100
+ and traverses upward through the DOM to identify the first container tag that matches the allowed list.
101
+ Allowed container tags include div, table, and iframe.
102
+ The traversal stops as soon as a parent in the allowed list is found, or before reaching <body>.
103
  Returns the outer HTML of the found container.
104
  """
105
  allowed_tags = {"div", "table", "iframe"}
106
  soup = BeautifulSoup(full_html, "html.parser")
107
+
108
+ # Find the tag containing the snippet
109
  candidate = soup.find(lambda tag: snippet in tag.get_text() if tag.get_text() else False)
110
+
111
  if candidate:
112
  current = candidate
113
+ # Traverse upward to find the first matching container in the allowed list
114
  while current.parent is not None and current.parent.name.lower() != "body" and current.parent.name.lower() != "html":
115
  if current.parent.name.lower() in allowed_tags:
116
  current = current.parent
117
+ break # Stop as soon as a match is found
118
  else:
119
+ current = current.parent
120
+ return str(current) # Return the outer HTML of the found container
121
  else:
122
+ return snippet # If snippet not found, return the snippet itself
123
 
124
 
125
  def fine_tune_report(adjustment_request: str, openai_api_key: str, serpapi_api_key: str, report_html: str,