Spaces:

Tulika2000
/

Research-Agent

Sleeping

App Files Files Community

Research-Agent / report_web.py

Tulika2000

Update report_web.py

c5f00ef verified about 1 month ago

raw

history blame contribute delete

5.63 kB

	from groq import Groq
	import os
	import re
	import tempfile
	from datetime import datetime
	from fpdf import FPDF

	SYSTEM_PROMPT = """You are a professional research report writer.
	Given raw research findings, produce a structured report with the following sections:

	1. Overview — 2-3 sentence summary of the topic
	2. Key Findings — 4-6 bullet points of the most important facts
	3. Background & Context — 1-2 paragraphs of background
	4. Current Developments — What's happening now / recent trends
	5. Key Players / Sources — Notable names, organizations, or URLs mentioned
	6. Summary — 2-3 sentence conclusion

	Use clear headings. Be factual and concise. Do not make up information not present in the research.
	"""


	def format_report_web(topic: str, raw_research: str) -> str:
	"""Format raw research into a clean Markdown report (no Rich formatting)."""
	client = Groq(api_key=os.environ["GROQ_API_KEY"])

	response = client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	temperature=0,
	stream=False,
	messages=[
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": f"Topic: {topic}\n\nRaw Research:\n{raw_research}"}
	]
	)

	report_body = response.choices[0].message.content
	timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")

	# Clean Markdown header without excessive spacing
	header = (
	f"# 📊 Research Report\n"
	f"Topic: {topic}  \|  Generated: {timestamp}\n"
	f"---\n\n"
	)

	return header + report_body.lstrip()


	# Matches a line that is ENTIRELY a bold heading, e.g. Overview or Key Findings
	_BOLD_HEADING_RE = re.compile(r'^\\([^:]+)\\*$')
	# Matches inline bold spans
	_INLINE_BOLD_RE = re.compile(r'\\(.?)\\*')


	def save_report_web(topic: str, report: str) -> str:
	"""Save report as a PDF in the system temporary directory."""
	clean_topic = re.sub(r'[^a-zA-Z0-9\s]', '', topic).strip().replace(" ", "_")
	if not clean_topic:
	clean_topic = "Research"

	filename = f"{clean_topic}_Research_Report.pdf"
	filepath = os.path.join(tempfile.gettempdir(), filename)

	pdf = FPDF()
	pdf.set_auto_page_break(auto=True, margin=15)
	pdf.add_page()
	pdf.set_margins(20, 20, 20)

	def safe(text: str) -> str:
	"""Drop non-latin-1 characters (emojis, special bullets) then encode."""
	text = re.sub(r'[^\x00-\xFF]', '', text) # strip chars outside latin-1
	return text.encode("latin-1", errors="replace").decode("latin-1")

	def strip_bold(text: str) -> str:
	return _INLINE_BOLD_RE.sub(r'\1', text)

	# Pre-process: split "Headingcontent" onto separate lines.
	# The LLM sometimes runs the section heading straight into the first bullet
	# e.g. "Key Findings- item one" → "Key Findings\n- item one"
	report = re.sub(r'(\\[^\n]+\\)([ \t][-*\u2022])', r'\1\n\2', report)

	for line in report.splitlines():
	stripped = line.strip()

	# H1 (# Title)
	if stripped.startswith("# ") and not stripped.startswith("## "):
	pdf.set_font("Helvetica", "B", 20)
	pdf.set_text_color(20, 20, 20)
	pdf.multi_cell(0, 10, safe(stripped[2:].strip()))
	pdf.ln(3)

	# H2 (## Section)
	elif stripped.startswith("## "):
	pdf.set_font("Helvetica", "B", 14)
	pdf.set_text_color(40, 40, 120)
	pdf.ln(4)
	pdf.multi_cell(0, 8, safe(stripped[3:].strip()))
	pdf.ln(1)

	# H3 (### Section)
	elif stripped.startswith("### "):
	pdf.set_font("Helvetica", "B", 12)
	pdf.set_text_color(80, 80, 80)
	pdf.ln(2)
	pdf.multi_cell(0, 7, safe(stripped[4:].strip()))

	# Horizontal rule (---)
	elif stripped.startswith("---"):
	pdf.set_draw_color(180, 180, 180)
	pdf.line(20, pdf.get_y(), 190, pdf.get_y())
	pdf.ln(4)

	# Bullet point (-, *, or unicode •)
	elif stripped.startswith(("- ", "* ")) or stripped.startswith("\u2022 "):
	text = re.sub(r'^[-*\u2022]\s+', '', stripped)
	text = strip_bold(text)
	pdf.set_font("Helvetica", "", 11)
	pdf.set_text_color(30, 30, 30)
	pdf.set_x(25)
	pdf.multi_cell(0, 6, safe("- " + text)) # ascii dash — safe in latin-1
	pdf.ln(1)

	# Standalone bold heading from LLM: Overview, Key Findings etc.
	elif _BOLD_HEADING_RE.match(stripped):
	heading = _BOLD_HEADING_RE.match(stripped).group(1).strip()
	pdf.set_font("Helvetica", "B", 13)
	pdf.set_text_color(40, 40, 120)
	pdf.ln(5)
	pdf.multi_cell(0, 8, safe(heading))
	pdf.ln(1)

	# Metadata line with mixed bold: Topic: ... \| Generated: ...
	elif stripped.startswith("**"):
	text = strip_bold(stripped)
	text = text.replace(" ", " ").replace("\|", " \| ")
	pdf.set_font("Helvetica", "I", 10)
	pdf.set_text_color(100, 100, 100)
	pdf.multi_cell(0, 6, safe(text))
	pdf.ln(1)

	# Empty line
	elif stripped == "":
	pdf.ln(3)

	# Regular paragraph text
	else:
	text = strip_bold(stripped)
	pdf.set_font("Helvetica", "", 11)
	pdf.set_text_color(30, 30, 30)
	pdf.multi_cell(0, 6, safe(text))
	pdf.ln(1)

	pdf.output(filepath)
	return filepath