Spaces:
Sleeping
Sleeping
| from groq import Groq | |
| import os | |
| import re | |
| import tempfile | |
| from datetime import datetime | |
| from fpdf import FPDF | |
| SYSTEM_PROMPT = """You are a professional research report writer. | |
| Given raw research findings, produce a structured report with the following sections: | |
| 1. **Overview** — 2-3 sentence summary of the topic | |
| 2. **Key Findings** — 4-6 bullet points of the most important facts | |
| 3. **Background & Context** — 1-2 paragraphs of background | |
| 4. **Current Developments** — What's happening now / recent trends | |
| 5. **Key Players / Sources** — Notable names, organizations, or URLs mentioned | |
| 6. **Summary** — 2-3 sentence conclusion | |
| Use clear headings. Be factual and concise. Do not make up information not present in the research. | |
| """ | |
| def format_report_web(topic: str, raw_research: str) -> str: | |
| """Format raw research into a clean Markdown report (no Rich formatting).""" | |
| client = Groq(api_key=os.environ["GROQ_API_KEY"]) | |
| response = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| temperature=0, | |
| stream=False, | |
| messages=[ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": f"Topic: {topic}\n\nRaw Research:\n{raw_research}"} | |
| ] | |
| ) | |
| report_body = response.choices[0].message.content | |
| timestamp = datetime.now().strftime("%Y-%m-%d %H:%M") | |
| # Clean Markdown header without excessive spacing | |
| header = ( | |
| f"# 📊 Research Report\n" | |
| f"**Topic:** {topic} | **Generated:** {timestamp}\n" | |
| f"---\n\n" | |
| ) | |
| return header + report_body.lstrip() | |
| # Matches a line that is ENTIRELY a bold heading, e.g. **Overview** or **Key Findings** | |
| _BOLD_HEADING_RE = re.compile(r'^\*\*([^*:]+)\*\*$') | |
| # Matches inline **bold** spans | |
| _INLINE_BOLD_RE = re.compile(r'\*\*(.*?)\*\*') | |
| def save_report_web(topic: str, report: str) -> str: | |
| """Save report as a PDF in the system temporary directory.""" | |
| clean_topic = re.sub(r'[^a-zA-Z0-9\s]', '', topic).strip().replace(" ", "_") | |
| if not clean_topic: | |
| clean_topic = "Research" | |
| filename = f"{clean_topic}_Research_Report.pdf" | |
| filepath = os.path.join(tempfile.gettempdir(), filename) | |
| pdf = FPDF() | |
| pdf.set_auto_page_break(auto=True, margin=15) | |
| pdf.add_page() | |
| pdf.set_margins(20, 20, 20) | |
| def safe(text: str) -> str: | |
| """Drop non-latin-1 characters (emojis, special bullets) then encode.""" | |
| text = re.sub(r'[^\x00-\xFF]', '', text) # strip chars outside latin-1 | |
| return text.encode("latin-1", errors="replace").decode("latin-1") | |
| def strip_bold(text: str) -> str: | |
| return _INLINE_BOLD_RE.sub(r'\1', text) | |
| # Pre-process: split "**Heading**content" onto separate lines. | |
| # The LLM sometimes runs the section heading straight into the first bullet | |
| # e.g. "**Key Findings**- item one" → "**Key Findings**\n- item one" | |
| report = re.sub(r'(\*\*[^*\n]+\*\*)([ \t]*[-*\u2022])', r'\1\n\2', report) | |
| for line in report.splitlines(): | |
| stripped = line.strip() | |
| # H1 (# Title) | |
| if stripped.startswith("# ") and not stripped.startswith("## "): | |
| pdf.set_font("Helvetica", "B", 20) | |
| pdf.set_text_color(20, 20, 20) | |
| pdf.multi_cell(0, 10, safe(stripped[2:].strip())) | |
| pdf.ln(3) | |
| # H2 (## Section) | |
| elif stripped.startswith("## "): | |
| pdf.set_font("Helvetica", "B", 14) | |
| pdf.set_text_color(40, 40, 120) | |
| pdf.ln(4) | |
| pdf.multi_cell(0, 8, safe(stripped[3:].strip())) | |
| pdf.ln(1) | |
| # H3 (### Section) | |
| elif stripped.startswith("### "): | |
| pdf.set_font("Helvetica", "B", 12) | |
| pdf.set_text_color(80, 80, 80) | |
| pdf.ln(2) | |
| pdf.multi_cell(0, 7, safe(stripped[4:].strip())) | |
| # Horizontal rule (---) | |
| elif stripped.startswith("---"): | |
| pdf.set_draw_color(180, 180, 180) | |
| pdf.line(20, pdf.get_y(), 190, pdf.get_y()) | |
| pdf.ln(4) | |
| # Bullet point (-, *, or unicode •) | |
| elif stripped.startswith(("- ", "* ")) or stripped.startswith("\u2022 "): | |
| text = re.sub(r'^[-*\u2022]\s+', '', stripped) | |
| text = strip_bold(text) | |
| pdf.set_font("Helvetica", "", 11) | |
| pdf.set_text_color(30, 30, 30) | |
| pdf.set_x(25) | |
| pdf.multi_cell(0, 6, safe("- " + text)) # ascii dash — safe in latin-1 | |
| pdf.ln(1) | |
| # Standalone bold heading from LLM: **Overview**, **Key Findings** etc. | |
| elif _BOLD_HEADING_RE.match(stripped): | |
| heading = _BOLD_HEADING_RE.match(stripped).group(1).strip() | |
| pdf.set_font("Helvetica", "B", 13) | |
| pdf.set_text_color(40, 40, 120) | |
| pdf.ln(5) | |
| pdf.multi_cell(0, 8, safe(heading)) | |
| pdf.ln(1) | |
| # Metadata line with mixed bold: **Topic:** ... | **Generated:** ... | |
| elif stripped.startswith("**"): | |
| text = strip_bold(stripped) | |
| text = text.replace(" ", " ").replace("|", " | ") | |
| pdf.set_font("Helvetica", "I", 10) | |
| pdf.set_text_color(100, 100, 100) | |
| pdf.multi_cell(0, 6, safe(text)) | |
| pdf.ln(1) | |
| # Empty line | |
| elif stripped == "": | |
| pdf.ln(3) | |
| # Regular paragraph text | |
| else: | |
| text = strip_bold(stripped) | |
| pdf.set_font("Helvetica", "", 11) | |
| pdf.set_text_color(30, 30, 30) | |
| pdf.multi_cell(0, 6, safe(text)) | |
| pdf.ln(1) | |
| pdf.output(filepath) | |
| return filepath | |