Research-Agent / report_web.py
Tulika2000's picture
Update report_web.py
c5f00ef verified
from groq import Groq
import os
import re
import tempfile
from datetime import datetime
from fpdf import FPDF
SYSTEM_PROMPT = """You are a professional research report writer.
Given raw research findings, produce a structured report with the following sections:
1. **Overview** — 2-3 sentence summary of the topic
2. **Key Findings** — 4-6 bullet points of the most important facts
3. **Background & Context** — 1-2 paragraphs of background
4. **Current Developments** — What's happening now / recent trends
5. **Key Players / Sources** — Notable names, organizations, or URLs mentioned
6. **Summary** — 2-3 sentence conclusion
Use clear headings. Be factual and concise. Do not make up information not present in the research.
"""
def format_report_web(topic: str, raw_research: str) -> str:
"""Format raw research into a clean Markdown report (no Rich formatting)."""
client = Groq(api_key=os.environ["GROQ_API_KEY"])
response = client.chat.completions.create(
model="llama-3.3-70b-versatile",
temperature=0,
stream=False,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"Topic: {topic}\n\nRaw Research:\n{raw_research}"}
]
)
report_body = response.choices[0].message.content
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
# Clean Markdown header without excessive spacing
header = (
f"# 📊 Research Report\n"
f"**Topic:** {topic}  |  **Generated:** {timestamp}\n"
f"---\n\n"
)
return header + report_body.lstrip()
# Matches a line that is ENTIRELY a bold heading, e.g. **Overview** or **Key Findings**
_BOLD_HEADING_RE = re.compile(r'^\*\*([^*:]+)\*\*$')
# Matches inline **bold** spans
_INLINE_BOLD_RE = re.compile(r'\*\*(.*?)\*\*')
def save_report_web(topic: str, report: str) -> str:
"""Save report as a PDF in the system temporary directory."""
clean_topic = re.sub(r'[^a-zA-Z0-9\s]', '', topic).strip().replace(" ", "_")
if not clean_topic:
clean_topic = "Research"
filename = f"{clean_topic}_Research_Report.pdf"
filepath = os.path.join(tempfile.gettempdir(), filename)
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
pdf.set_margins(20, 20, 20)
def safe(text: str) -> str:
"""Drop non-latin-1 characters (emojis, special bullets) then encode."""
text = re.sub(r'[^\x00-\xFF]', '', text) # strip chars outside latin-1
return text.encode("latin-1", errors="replace").decode("latin-1")
def strip_bold(text: str) -> str:
return _INLINE_BOLD_RE.sub(r'\1', text)
# Pre-process: split "**Heading**content" onto separate lines.
# The LLM sometimes runs the section heading straight into the first bullet
# e.g. "**Key Findings**- item one" → "**Key Findings**\n- item one"
report = re.sub(r'(\*\*[^*\n]+\*\*)([ \t]*[-*\u2022])', r'\1\n\2', report)
for line in report.splitlines():
stripped = line.strip()
# H1 (# Title)
if stripped.startswith("# ") and not stripped.startswith("## "):
pdf.set_font("Helvetica", "B", 20)
pdf.set_text_color(20, 20, 20)
pdf.multi_cell(0, 10, safe(stripped[2:].strip()))
pdf.ln(3)
# H2 (## Section)
elif stripped.startswith("## "):
pdf.set_font("Helvetica", "B", 14)
pdf.set_text_color(40, 40, 120)
pdf.ln(4)
pdf.multi_cell(0, 8, safe(stripped[3:].strip()))
pdf.ln(1)
# H3 (### Section)
elif stripped.startswith("### "):
pdf.set_font("Helvetica", "B", 12)
pdf.set_text_color(80, 80, 80)
pdf.ln(2)
pdf.multi_cell(0, 7, safe(stripped[4:].strip()))
# Horizontal rule (---)
elif stripped.startswith("---"):
pdf.set_draw_color(180, 180, 180)
pdf.line(20, pdf.get_y(), 190, pdf.get_y())
pdf.ln(4)
# Bullet point (-, *, or unicode •)
elif stripped.startswith(("- ", "* ")) or stripped.startswith("\u2022 "):
text = re.sub(r'^[-*\u2022]\s+', '', stripped)
text = strip_bold(text)
pdf.set_font("Helvetica", "", 11)
pdf.set_text_color(30, 30, 30)
pdf.set_x(25)
pdf.multi_cell(0, 6, safe("- " + text)) # ascii dash — safe in latin-1
pdf.ln(1)
# Standalone bold heading from LLM: **Overview**, **Key Findings** etc.
elif _BOLD_HEADING_RE.match(stripped):
heading = _BOLD_HEADING_RE.match(stripped).group(1).strip()
pdf.set_font("Helvetica", "B", 13)
pdf.set_text_color(40, 40, 120)
pdf.ln(5)
pdf.multi_cell(0, 8, safe(heading))
pdf.ln(1)
# Metadata line with mixed bold: **Topic:** ... | **Generated:** ...
elif stripped.startswith("**"):
text = strip_bold(stripped)
text = text.replace(" ", " ").replace("|", " | ")
pdf.set_font("Helvetica", "I", 10)
pdf.set_text_color(100, 100, 100)
pdf.multi_cell(0, 6, safe(text))
pdf.ln(1)
# Empty line
elif stripped == "":
pdf.ln(3)
# Regular paragraph text
else:
text = strip_bold(stripped)
pdf.set_font("Helvetica", "", 11)
pdf.set_text_color(30, 30, 30)
pdf.multi_cell(0, 6, safe(text))
pdf.ln(1)
pdf.output(filepath)
return filepath