Spaces:

10gen
/

deepsearchitv2

Running

App Files Files Community

Guiyom commited on Mar 4, 2025

Commit

e90e656

verified ·

1 Parent(s): d7c3042

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -2

app.py CHANGED Viewed

@@ -212,6 +212,19 @@ Only output valid JSON."""
             logging.warning("fine_tune_report: Could not locate a container for unique string: '%s'", uniq_str)
             continue
         original_container_html = str(container_tag)
         logging.info("fine_tune_report: Found container for unique string adjustment:\n\n%s\n", original_container_html)
@@ -1481,7 +1494,7 @@ Keep all mentions of names, people/titles, dates, papers, reports, organisation/
     return final_summary.strip()
-def analyze_with_gpt4o(query: str, snippet: str, breadth: int, temperature: float = 0.7, max_tokens: int = 8000) -> dict:
     # If snippet is a callable, call it to get the string.
     if callable(snippet):
         snippet = snippet()
@@ -1504,6 +1517,12 @@ Analyze the following content from a query result:
 Research topic:
 {query}
 Instructions:
 1.  Relevance: Determine if the content is relevant to the research topic. Answer with a single word: "yes" or "no".
@@ -1528,6 +1547,8 @@ Source length: {snippet_words} words. You may produce a more detailed summary if
 IMPORTANT: Format your response as a proper JSON object with these fields:
 - "relevant": "yes" or "no"
 - "summary": {{...your structured summary with all parts...}}
 - "followups": [array of follow-up queries]
 """
@@ -2645,7 +2666,7 @@ def iterative_deep_research_gen(initial_query: str, reportstyle: str, breadth: i
                 # 2) Extract structured data
                 semantically_rich_snippet = extract_structured_insights(cleaned_html)
                 # 3) Summarize with LLM
-                analysis = analyze_with_gpt4o(initial_query, semantically_rich_snippet, breadth)
                 # Analyze the cleaned content with GPT-4o-mini
                 cleaned_text = clean_content(raw_content)  # Call the function to get a string.

             logging.warning("fine_tune_report: Could not locate a container for unique string: '%s'", uniq_str)
             continue
+        if "remove" in adjustment_request.lower():
+            # Attempt to extract target phrase from the removal instruction.
+            m = re.search(r'remove\s+(?:the\s+duplicate\s+)?mention\s+of\s+the\s+source:\s*(.+)', adjustment_request.lower())
+            if m:
+                target = m.group(1).strip()
+                text_lower = container_tag.get_text().lower()
+                if target in text_lower:
+                    # Remove this container entirely.
+                    container_tag.replace_with(BeautifulSoup("", "html.parser"))
+                    logging.info("fine_tune_report: Removed section containing target '%s'", target)
+                    corrections_summary.append(f"Section containing {target} removed as per request.")
+                    continue  # Skip further processing of this snippet.
         original_container_html = str(container_tag)
         logging.info("fine_tune_report: Found container for unique string adjustment:\n\n%s\n", original_container_html)
     return final_summary.strip()
+def analyze_with_gpt4o(query: str, snippet: str, breadth: int, temperature: float = 0.7, max_tokens: int = 8000, url: str, title: str) -> dict:
     # If snippet is a callable, call it to get the string.
     if callable(snippet):
         snippet = snippet()
 Research topic:
 {query}
+url:
+{url}
+title:
+{title}
 Instructions:
 1.  Relevance: Determine if the content is relevant to the research topic. Answer with a single word: "yes" or "no".
 IMPORTANT: Format your response as a proper JSON object with these fields:
 - "relevant": "yes" or "no"
+- url: full url
+- title: title
 - "summary": {{...your structured summary with all parts...}}
 - "followups": [array of follow-up queries]
 """
                 # 2) Extract structured data
                 semantically_rich_snippet = extract_structured_insights(cleaned_html)
                 # 3) Summarize with LLM
+                analysis = analyze_with_gpt4o(initial_query, semantically_rich_snippet, breadth, url, title)
                 # Analyze the cleaned content with GPT-4o-mini
                 cleaned_text = clean_content(raw_content)  # Call the function to get a string.