Spaces:

10gen
/

deepsearchitv2

Runtime error

App Files Files Community

Guiyom commited on Feb 24, 2025

Commit

5f22904

verified ·

1 Parent(s): df16717

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -1

app.py CHANGED Viewed

@@ -33,6 +33,103 @@ TOTAL_SUMMARIZED_WORDS = 0
 # Helper functions for external APIs and PDF Processing
 # =============================================================================
 def generate_graph_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
     graph_examples = """
@@ -2327,7 +2424,8 @@ def main():
                     report_status = gr.Textbox(label="Report Status", interactive=False, lines=2, value="Click 'Generate Report' to create your PDF report.")
                     report_file = gr.File(label="Download Report", visible=False, interactive=False, file_types=[".pdf"])
                     generate_button = gr.Button("Generate Report")
         with gr.Accordion("6] Extra Context (Crumbs, Existing Report & Log, Processed Queries)", open=False):
             existing_report = gr.Textbox(label="Existing Report (if any)", placeholder="Paste previously generated report here...", lines=4)
             existing_log = gr.Textbox(label="Existing Process Log (if any)", placeholder="Paste previously generated log here...", lines=4)
@@ -2387,6 +2485,13 @@ def main():
             outputs=[report_status, report_file]
         )
         demo.launch()
 if __name__ == "__main__":

 # Helper functions for external APIs and PDF Processing
 # =============================================================================
+def fine_tune_report(report_html: str, initial_request: str, qa: str, target_style: str,
+                     knowledge_crumbs: str, complementary_guidance: str) -> (str, str):
+    """
+    Fine-tunes the report by identifying improvable chunks, processing each one,
+    and updating the report HTML with improved content. Also returns an updated
+    plain text summary of changes that can be appended to the Q&A textbox.
+    Inputs:
+      - report_html: The full HTML code of the current report (non-empty)
+      - initial_request: The original research query or instructions
+      - qa: The existing Q&A text (plain text)
+      - target_style: The desired style (e.g., "Academic style") to apply uniformly
+      - knowledge_crumbs: The aggregated knowledge nuggets extracted from the searches
+      - complementary_guidance: Any additional guidance from the search parameters
+    Returns:
+      - A tuple of two elements:
+          (final_report_html, updated_qa)
+        Where:
+          * final_report_html is the updated HTML report (with reinjected improved chunks)
+          * updated_qa is the original Q&A text with a plain text summary of improvements appended.
+    """
+    from bs4 import BeautifulSoup
+    import json
+    import logging
+    # Parse the existing report HTML
+    soup = BeautifulSoup(report_html, "html.parser")
+    # Try to find pre-marked improvable chunks.
+    # (For example, your report generator may wrap sections in <div class="improvable-chunk">...</div>)
+    chunks = soup.find_all("div", class_="improvable-chunk")
+    # If no marked chunks exist, as a fallback we group paragraphs into chunks.
+    if not chunks:
+        all_paragraphs = soup.find_all("p")
+        chunks = []
+        # Group every n paragraphs into a chunk (ensure at least 5 chunks)
+        group_size = max(1, len(all_paragraphs) // 10)
+        # Create new chunks and append them at the end of the body
+        for i in range(0, len(all_paragraphs), group_size):
+            new_div = soup.new_tag("div", **{"class": "improvable-chunk"})
+            for p in all_paragraphs[i:i+group_size]:
+                new_div.append(p.extract())
+            # Append the newly created chunk back to <body>
+            soup.body.append(new_div)
+            chunks.append(new_div)
+    improvements_summary = []  # to store plain text summary for each chunk
+    # Process each identified chunk sequentially.
+    for idx, chunk in enumerate(chunks, start=1):
+        original_chunk = str(chunk)
+        # Build a detailed prompt including all relevant inputs.
+        prompt = (
+            f"Improve the following report chunk to enhance clarity, incorporate additional knowledge, "
+            f"and ensure any citations are consistent. Replace any placeholders or visuals so that the text "
+            f"becomes more cohesive and well written, matching the target style.\n\n"
+            f"--- Chunk #{idx} Original Content ---\n{original_chunk}\n\n"
+            f"Initial Request: {initial_request}\n\n"
+            f"Clarification Q&A: {qa}\n\n"
+            f"Target Style: {target_style}\n\n"
+            f"Knowledge Crumbs: {knowledge_crumbs}\n\n"
+            f"Complementary Guidance: {complementary_guidance}\n\n"
+            f"Please output a JSON object with exactly two fields (no extra commentary):\n"
+            f'{{"improved": "<the improved chunk in valid HTML>", "summary": "<a brief summary of changes>"}}\n'
+            f"Do not include any markdown formatting or backticks."
+        )
+        # Call the model (using openai_call with a high token limit—for example, 5000 tokens)
+        result = openai_call(prompt, model="o3-mini", max_tokens_param=5000, temperature=0.5)
+        result = result.strip().strip("```")
+        try:
+            res_json = json.loads(result)
+            improved_chunk = res_json.get("improved")
+            chunk_summary = res_json.get("summary")
+            if improved_chunk and chunk_summary:
+                improvements_summary.append(f"Chunk {idx}: {chunk_summary}")
+                # Replace the old chunk with the improved HTML.
+                new_chunk = BeautifulSoup(improved_chunk, "html.parser")
+                chunk.replace_with(new_chunk)
+            else:
+                logging.error(f"Chunk {idx}: Incomplete JSON result: {result}")
+        except Exception as e:
+            logging.error(f"Error processing chunk {idx}: {e}. Raw result: {result}")
+    # Get the updated report HTML as a string.
+    final_report_html = str(soup)
+    # Create a plain text summary of improvements.
+    summary_text = "Summary of Fine-Tuning Improvements:\n" + "\n".join(improvements_summary)
+    # Append the summary (preceded by a separator) to the original QA text.
+    updated_qa = qa.strip() + "\n----------\n" + summary_text
+    return final_report_html, updated_qa
 def generate_graph_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
     graph_examples = """
                     report_status = gr.Textbox(label="Report Status", interactive=False, lines=2, value="Click 'Generate Report' to create your PDF report.")
                     report_file = gr.File(label="Download Report", visible=False, interactive=False, file_types=[".pdf"])
                     generate_button = gr.Button("Generate Report")
+                    fine_tune_button = gr.Button("AI Improve the Report")
         with gr.Accordion("6] Extra Context (Crumbs, Existing Report & Log, Processed Queries)", open=False):
             existing_report = gr.Textbox(label="Existing Report (if any)", placeholder="Paste previously generated report here...", lines=4)
             existing_log = gr.Textbox(label="Existing Process Log (if any)", placeholder="Paste previously generated log here...", lines=4)
             outputs=[report_status, report_file]
         )
+        fine_tune_button.click(
+            fn=fine_tune_report,
+            inputs=[final_report, research_query, clarification_text, reportstyle, crumbs_box, additional_clarifications],
+            outputs=[fine_tuned_report, clarification_text]
+        )
         demo.launch()
 if __name__ == "__main__":