Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -212,6 +212,19 @@ Only output valid JSON."""
|
|
| 212 |
logging.warning("fine_tune_report: Could not locate a container for unique string: '%s'", uniq_str)
|
| 213 |
continue
|
| 214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
original_container_html = str(container_tag)
|
| 216 |
logging.info("fine_tune_report: Found container for unique string adjustment:\n\n%s\n", original_container_html)
|
| 217 |
|
|
@@ -1481,7 +1494,7 @@ Keep all mentions of names, people/titles, dates, papers, reports, organisation/
|
|
| 1481 |
return final_summary.strip()
|
| 1482 |
|
| 1483 |
|
| 1484 |
-
def analyze_with_gpt4o(query: str, snippet: str, breadth: int, temperature: float = 0.7, max_tokens: int = 8000) -> dict:
|
| 1485 |
# If snippet is a callable, call it to get the string.
|
| 1486 |
if callable(snippet):
|
| 1487 |
snippet = snippet()
|
|
@@ -1504,6 +1517,12 @@ Analyze the following content from a query result:
|
|
| 1504 |
Research topic:
|
| 1505 |
{query}
|
| 1506 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1507 |
Instructions:
|
| 1508 |
1. Relevance: Determine if the content is relevant to the research topic. Answer with a single word: "yes" or "no".
|
| 1509 |
|
|
@@ -1528,6 +1547,8 @@ Source length: {snippet_words} words. You may produce a more detailed summary if
|
|
| 1528 |
|
| 1529 |
IMPORTANT: Format your response as a proper JSON object with these fields:
|
| 1530 |
- "relevant": "yes" or "no"
|
|
|
|
|
|
|
| 1531 |
- "summary": {{...your structured summary with all parts...}}
|
| 1532 |
- "followups": [array of follow-up queries]
|
| 1533 |
"""
|
|
@@ -2645,7 +2666,7 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
|
|
| 2645 |
# 2) Extract structured data
|
| 2646 |
semantically_rich_snippet = extract_structured_insights(cleaned_html)
|
| 2647 |
# 3) Summarize with LLM
|
| 2648 |
-
analysis = analyze_with_gpt4o(initial_query, semantically_rich_snippet, breadth)
|
| 2649 |
|
| 2650 |
# Analyze the cleaned content with GPT-4o-mini
|
| 2651 |
cleaned_text = clean_content(raw_content) # Call the function to get a string.
|
|
|
|
| 212 |
logging.warning("fine_tune_report: Could not locate a container for unique string: '%s'", uniq_str)
|
| 213 |
continue
|
| 214 |
|
| 215 |
+
if "remove" in adjustment_request.lower():
|
| 216 |
+
# Attempt to extract target phrase from the removal instruction.
|
| 217 |
+
m = re.search(r'remove\s+(?:the\s+duplicate\s+)?mention\s+of\s+the\s+source:\s*(.+)', adjustment_request.lower())
|
| 218 |
+
if m:
|
| 219 |
+
target = m.group(1).strip()
|
| 220 |
+
text_lower = container_tag.get_text().lower()
|
| 221 |
+
if target in text_lower:
|
| 222 |
+
# Remove this container entirely.
|
| 223 |
+
container_tag.replace_with(BeautifulSoup("", "html.parser"))
|
| 224 |
+
logging.info("fine_tune_report: Removed section containing target '%s'", target)
|
| 225 |
+
corrections_summary.append(f"Section containing {target} removed as per request.")
|
| 226 |
+
continue # Skip further processing of this snippet.
|
| 227 |
+
|
| 228 |
original_container_html = str(container_tag)
|
| 229 |
logging.info("fine_tune_report: Found container for unique string adjustment:\n\n%s\n", original_container_html)
|
| 230 |
|
|
|
|
| 1494 |
return final_summary.strip()
|
| 1495 |
|
| 1496 |
|
| 1497 |
+
def analyze_with_gpt4o(query: str, snippet: str, breadth: int, temperature: float = 0.7, max_tokens: int = 8000, url: str, title: str) -> dict:
|
| 1498 |
# If snippet is a callable, call it to get the string.
|
| 1499 |
if callable(snippet):
|
| 1500 |
snippet = snippet()
|
|
|
|
| 1517 |
Research topic:
|
| 1518 |
{query}
|
| 1519 |
|
| 1520 |
+
url:
|
| 1521 |
+
{url}
|
| 1522 |
+
|
| 1523 |
+
title:
|
| 1524 |
+
{title}
|
| 1525 |
+
|
| 1526 |
Instructions:
|
| 1527 |
1. Relevance: Determine if the content is relevant to the research topic. Answer with a single word: "yes" or "no".
|
| 1528 |
|
|
|
|
| 1547 |
|
| 1548 |
IMPORTANT: Format your response as a proper JSON object with these fields:
|
| 1549 |
- "relevant": "yes" or "no"
|
| 1550 |
+
- url: full url
|
| 1551 |
+
- title: title
|
| 1552 |
- "summary": {{...your structured summary with all parts...}}
|
| 1553 |
- "followups": [array of follow-up queries]
|
| 1554 |
"""
|
|
|
|
| 2666 |
# 2) Extract structured data
|
| 2667 |
semantically_rich_snippet = extract_structured_insights(cleaned_html)
|
| 2668 |
# 3) Summarize with LLM
|
| 2669 |
+
analysis = analyze_with_gpt4o(initial_query, semantically_rich_snippet, breadth, url, title)
|
| 2670 |
|
| 2671 |
# Analyze the cleaned content with GPT-4o-mini
|
| 2672 |
cleaned_text = clean_content(raw_content) # Call the function to get a string.
|