Guiyom commited on
Commit
5f22904
·
verified ·
1 Parent(s): df16717

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -1
app.py CHANGED
@@ -33,6 +33,103 @@ TOTAL_SUMMARIZED_WORDS = 0
33
  # Helper functions for external APIs and PDF Processing
34
  # =============================================================================
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def generate_graph_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
37
 
38
  graph_examples = """
@@ -2327,7 +2424,8 @@ def main():
2327
  report_status = gr.Textbox(label="Report Status", interactive=False, lines=2, value="Click 'Generate Report' to create your PDF report.")
2328
  report_file = gr.File(label="Download Report", visible=False, interactive=False, file_types=[".pdf"])
2329
  generate_button = gr.Button("Generate Report")
2330
-
 
2331
  with gr.Accordion("6] Extra Context (Crumbs, Existing Report & Log, Processed Queries)", open=False):
2332
  existing_report = gr.Textbox(label="Existing Report (if any)", placeholder="Paste previously generated report here...", lines=4)
2333
  existing_log = gr.Textbox(label="Existing Process Log (if any)", placeholder="Paste previously generated log here...", lines=4)
@@ -2387,6 +2485,13 @@ def main():
2387
  outputs=[report_status, report_file]
2388
  )
2389
 
 
 
 
 
 
 
 
2390
  demo.launch()
2391
 
2392
  if __name__ == "__main__":
 
33
  # Helper functions for external APIs and PDF Processing
34
  # =============================================================================
35
 
36
+ def fine_tune_report(report_html: str, initial_request: str, qa: str, target_style: str,
37
+ knowledge_crumbs: str, complementary_guidance: str) -> (str, str):
38
+ """
39
+ Fine-tunes the report by identifying improvable chunks, processing each one,
40
+ and updating the report HTML with improved content. Also returns an updated
41
+ plain text summary of changes that can be appended to the Q&A textbox.
42
+
43
+ Inputs:
44
+ - report_html: The full HTML code of the current report (non-empty)
45
+ - initial_request: The original research query or instructions
46
+ - qa: The existing Q&A text (plain text)
47
+ - target_style: The desired style (e.g., "Academic style") to apply uniformly
48
+ - knowledge_crumbs: The aggregated knowledge nuggets extracted from the searches
49
+ - complementary_guidance: Any additional guidance from the search parameters
50
+
51
+ Returns:
52
+ - A tuple of two elements:
53
+ (final_report_html, updated_qa)
54
+ Where:
55
+ * final_report_html is the updated HTML report (with reinjected improved chunks)
56
+ * updated_qa is the original Q&A text with a plain text summary of improvements appended.
57
+ """
58
+ from bs4 import BeautifulSoup
59
+ import json
60
+ import logging
61
+
62
+ # Parse the existing report HTML
63
+ soup = BeautifulSoup(report_html, "html.parser")
64
+
65
+ # Try to find pre-marked improvable chunks.
66
+ # (For example, your report generator may wrap sections in <div class="improvable-chunk">...</div>)
67
+ chunks = soup.find_all("div", class_="improvable-chunk")
68
+
69
+ # If no marked chunks exist, as a fallback we group paragraphs into chunks.
70
+ if not chunks:
71
+ all_paragraphs = soup.find_all("p")
72
+ chunks = []
73
+ # Group every n paragraphs into a chunk (ensure at least 5 chunks)
74
+ group_size = max(1, len(all_paragraphs) // 10)
75
+ # Create new chunks and append them at the end of the body
76
+ for i in range(0, len(all_paragraphs), group_size):
77
+ new_div = soup.new_tag("div", **{"class": "improvable-chunk"})
78
+ for p in all_paragraphs[i:i+group_size]:
79
+ new_div.append(p.extract())
80
+ # Append the newly created chunk back to <body>
81
+ soup.body.append(new_div)
82
+ chunks.append(new_div)
83
+
84
+ improvements_summary = [] # to store plain text summary for each chunk
85
+
86
+ # Process each identified chunk sequentially.
87
+ for idx, chunk in enumerate(chunks, start=1):
88
+ original_chunk = str(chunk)
89
+ # Build a detailed prompt including all relevant inputs.
90
+ prompt = (
91
+ f"Improve the following report chunk to enhance clarity, incorporate additional knowledge, "
92
+ f"and ensure any citations are consistent. Replace any placeholders or visuals so that the text "
93
+ f"becomes more cohesive and well written, matching the target style.\n\n"
94
+ f"--- Chunk #{idx} Original Content ---\n{original_chunk}\n\n"
95
+ f"Initial Request: {initial_request}\n\n"
96
+ f"Clarification Q&A: {qa}\n\n"
97
+ f"Target Style: {target_style}\n\n"
98
+ f"Knowledge Crumbs: {knowledge_crumbs}\n\n"
99
+ f"Complementary Guidance: {complementary_guidance}\n\n"
100
+ f"Please output a JSON object with exactly two fields (no extra commentary):\n"
101
+ f'{{"improved": "<the improved chunk in valid HTML>", "summary": "<a brief summary of changes>"}}\n'
102
+ f"Do not include any markdown formatting or backticks."
103
+ )
104
+
105
+ # Call the model (using openai_call with a high token limit—for example, 5000 tokens)
106
+ result = openai_call(prompt, model="o3-mini", max_tokens_param=5000, temperature=0.5)
107
+ result = result.strip().strip("```")
108
+ try:
109
+ res_json = json.loads(result)
110
+ improved_chunk = res_json.get("improved")
111
+ chunk_summary = res_json.get("summary")
112
+ if improved_chunk and chunk_summary:
113
+ improvements_summary.append(f"Chunk {idx}: {chunk_summary}")
114
+ # Replace the old chunk with the improved HTML.
115
+ new_chunk = BeautifulSoup(improved_chunk, "html.parser")
116
+ chunk.replace_with(new_chunk)
117
+ else:
118
+ logging.error(f"Chunk {idx}: Incomplete JSON result: {result}")
119
+ except Exception as e:
120
+ logging.error(f"Error processing chunk {idx}: {e}. Raw result: {result}")
121
+
122
+ # Get the updated report HTML as a string.
123
+ final_report_html = str(soup)
124
+
125
+ # Create a plain text summary of improvements.
126
+ summary_text = "Summary of Fine-Tuning Improvements:\n" + "\n".join(improvements_summary)
127
+
128
+ # Append the summary (preceded by a separator) to the original QA text.
129
+ updated_qa = qa.strip() + "\n----------\n" + summary_text
130
+
131
+ return final_report_html, updated_qa
132
+
133
  def generate_graph_snippet(placeholder_text: str, context: str, initial_query: str, crumbs: str) -> str:
134
 
135
  graph_examples = """
 
2424
  report_status = gr.Textbox(label="Report Status", interactive=False, lines=2, value="Click 'Generate Report' to create your PDF report.")
2425
  report_file = gr.File(label="Download Report", visible=False, interactive=False, file_types=[".pdf"])
2426
  generate_button = gr.Button("Generate Report")
2427
+ fine_tune_button = gr.Button("AI Improve the Report")
2428
+
2429
  with gr.Accordion("6] Extra Context (Crumbs, Existing Report & Log, Processed Queries)", open=False):
2430
  existing_report = gr.Textbox(label="Existing Report (if any)", placeholder="Paste previously generated report here...", lines=4)
2431
  existing_log = gr.Textbox(label="Existing Process Log (if any)", placeholder="Paste previously generated log here...", lines=4)
 
2485
  outputs=[report_status, report_file]
2486
  )
2487
 
2488
+ fine_tune_button.click(
2489
+ fn=fine_tune_report,
2490
+ inputs=[final_report, research_query, clarification_text, reportstyle, crumbs_box, additional_clarifications],
2491
+ outputs=[fine_tuned_report, clarification_text]
2492
+ )
2493
+
2494
+
2495
  demo.launch()
2496
 
2497
  if __name__ == "__main__":