Guiyom commited on
Commit
e90e656
·
verified ·
1 Parent(s): d7c3042

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -2
app.py CHANGED
@@ -212,6 +212,19 @@ Only output valid JSON."""
212
  logging.warning("fine_tune_report: Could not locate a container for unique string: '%s'", uniq_str)
213
  continue
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  original_container_html = str(container_tag)
216
  logging.info("fine_tune_report: Found container for unique string adjustment:\n\n%s\n", original_container_html)
217
 
@@ -1481,7 +1494,7 @@ Keep all mentions of names, people/titles, dates, papers, reports, organisation/
1481
  return final_summary.strip()
1482
 
1483
 
1484
- def analyze_with_gpt4o(query: str, snippet: str, breadth: int, temperature: float = 0.7, max_tokens: int = 8000) -> dict:
1485
  # If snippet is a callable, call it to get the string.
1486
  if callable(snippet):
1487
  snippet = snippet()
@@ -1504,6 +1517,12 @@ Analyze the following content from a query result:
1504
  Research topic:
1505
  {query}
1506
 
 
 
 
 
 
 
1507
  Instructions:
1508
  1. Relevance: Determine if the content is relevant to the research topic. Answer with a single word: "yes" or "no".
1509
 
@@ -1528,6 +1547,8 @@ Source length: {snippet_words} words. You may produce a more detailed summary if
1528
 
1529
  IMPORTANT: Format your response as a proper JSON object with these fields:
1530
  - "relevant": "yes" or "no"
 
 
1531
  - "summary": {{...your structured summary with all parts...}}
1532
  - "followups": [array of follow-up queries]
1533
  """
@@ -2645,7 +2666,7 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
2645
  # 2) Extract structured data
2646
  semantically_rich_snippet = extract_structured_insights(cleaned_html)
2647
  # 3) Summarize with LLM
2648
- analysis = analyze_with_gpt4o(initial_query, semantically_rich_snippet, breadth)
2649
 
2650
  # Analyze the cleaned content with GPT-4o-mini
2651
  cleaned_text = clean_content(raw_content) # Call the function to get a string.
 
212
  logging.warning("fine_tune_report: Could not locate a container for unique string: '%s'", uniq_str)
213
  continue
214
 
215
+ if "remove" in adjustment_request.lower():
216
+ # Attempt to extract target phrase from the removal instruction.
217
+ m = re.search(r'remove\s+(?:the\s+duplicate\s+)?mention\s+of\s+the\s+source:\s*(.+)', adjustment_request.lower())
218
+ if m:
219
+ target = m.group(1).strip()
220
+ text_lower = container_tag.get_text().lower()
221
+ if target in text_lower:
222
+ # Remove this container entirely.
223
+ container_tag.replace_with(BeautifulSoup("", "html.parser"))
224
+ logging.info("fine_tune_report: Removed section containing target '%s'", target)
225
+ corrections_summary.append(f"Section containing {target} removed as per request.")
226
+ continue # Skip further processing of this snippet.
227
+
228
  original_container_html = str(container_tag)
229
  logging.info("fine_tune_report: Found container for unique string adjustment:\n\n%s\n", original_container_html)
230
 
 
1494
  return final_summary.strip()
1495
 
1496
 
1497
+ def analyze_with_gpt4o(query: str, snippet: str, breadth: int, temperature: float = 0.7, max_tokens: int = 8000, url: str, title: str) -> dict:
1498
  # If snippet is a callable, call it to get the string.
1499
  if callable(snippet):
1500
  snippet = snippet()
 
1517
  Research topic:
1518
  {query}
1519
 
1520
+ url:
1521
+ {url}
1522
+
1523
+ title:
1524
+ {title}
1525
+
1526
  Instructions:
1527
  1. Relevance: Determine if the content is relevant to the research topic. Answer with a single word: "yes" or "no".
1528
 
 
1547
 
1548
  IMPORTANT: Format your response as a proper JSON object with these fields:
1549
  - "relevant": "yes" or "no"
1550
+ - url: full url
1551
+ - title: title
1552
  - "summary": {{...your structured summary with all parts...}}
1553
  - "followups": [array of follow-up queries]
1554
  """
 
2666
  # 2) Extract structured data
2667
  semantically_rich_snippet = extract_structured_insights(cleaned_html)
2668
  # 3) Summarize with LLM
2669
+ analysis = analyze_with_gpt4o(initial_query, semantically_rich_snippet, breadth, url, title)
2670
 
2671
  # Analyze the cleaned content with GPT-4o-mini
2672
  cleaned_text = clean_content(raw_content) # Call the function to get a string.