Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import sys | |
| import time | |
| import importlib | |
| import traceback | |
| import re | |
| from huggingface_hub import InferenceClient | |
| # ── Token loading | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| if not HF_TOKEN: | |
| print("⚠️ WARNING: HF_TOKEN environment variable is not set!") | |
| else: | |
| print(f"✅ HF_TOKEN loaded — starts with: {HF_TOKEN[:8]}...") | |
| # ── SafeBarChart module | |
| _safe_bar_code = ''' | |
| from reportlab.platypus import Flowable | |
| from reportlab.lib.colors import HexColor, black | |
| class SafeBarChart(Flowable): | |
| def __init__(self, data, width=480, height=180, color="#4472C4"): | |
| super().__init__() | |
| self.chart_data = [(str(l), float(str(v).replace(",","").replace("$",""))) for l, v in data] | |
| self.width = width | |
| self.height = height | |
| self.color = color | |
| def wrap(self, availWidth, availHeight): | |
| return (self.width, self.height) | |
| def draw(self): | |
| n = len(self.chart_data) | |
| if n == 0: | |
| return | |
| max_val = max(v for _, v in self.chart_data) if max(v for _, v in self.chart_data) > 0 else 1 | |
| chart_area_h = self.height - 40 | |
| total_w = self.width - 20 | |
| bar_w = max(20, (total_w / n) * 0.6) | |
| gap = max(5, (total_w / n) * 0.4) | |
| total_used = n * bar_w + (n - 1) * gap | |
| x_offset = (self.width - total_used) / 2 | |
| for i, (label, val) in enumerate(self.chart_data): | |
| bar_h = max(4, (val / max_val) * chart_area_h) | |
| x = x_offset + i * (bar_w + gap) | |
| y = 25 | |
| self.canv.setFillColor(HexColor(self.color)) | |
| self.canv.rect(x, y, bar_w, bar_h, fill=1, stroke=0) | |
| self.canv.setFillColor(black) | |
| self.canv.setFont("Helvetica", 6) | |
| if val >= 1000000: | |
| short_val = "$" + str(round(val / 1000000, 1)) + "M" | |
| elif val >= 1000: | |
| short_val = "$" + str(int(val / 1000)) + "K" | |
| else: | |
| short_val = str(int(val)) | |
| self.canv.drawCentredString(x + bar_w / 2, y + bar_h + 3, short_val) | |
| self.canv.drawCentredString(x + bar_w / 2, y - 10, label[:3]) | |
| ''' | |
| # ── CoverPage module | |
| _cover_page_code = ''' | |
| from reportlab.lib.pagesizes import letter | |
| from reportlab.lib.colors import HexColor, white | |
| def draw_cover(canvas, doc, company="DataFlow Inc", subtitle="Monthly Business Report", date="May 2026"): | |
| w, h = letter | |
| canvas.saveState() | |
| canvas.resetTransforms() | |
| canvas.setFillColor(white) | |
| canvas.rect(0, 0, w, h, fill=1, stroke=0) | |
| canvas.setFillColor(HexColor("#1F3564")) | |
| canvas.rect(0, h - 120, w, 120, fill=1, stroke=0) | |
| canvas.setFillColor(white) | |
| canvas.setFont("Helvetica-Bold", 28) | |
| canvas.drawCentredString(w / 2, h - 75, company) | |
| canvas.setFillColor(HexColor("#1F3564")) | |
| canvas.setFont("Helvetica-Bold", 18) | |
| canvas.drawCentredString(w / 2, h - 200, subtitle) | |
| canvas.setFillColor(HexColor("#666666")) | |
| canvas.setFont("Helvetica", 13) | |
| canvas.drawCentredString(w / 2, h - 240, date) | |
| canvas.setStrokeColor(HexColor("#4472C4")) | |
| canvas.setLineWidth(2) | |
| canvas.line(w * 0.2, h - 270, w * 0.8, h - 270) | |
| canvas.restoreState() | |
| ''' | |
| # ── Write modules and clear cache | |
| with open("/tmp/SafeBarChart.py", "w") as f: | |
| f.write(_safe_bar_code) | |
| with open("/tmp/CoverPage.py", "w") as f: | |
| f.write(_cover_page_code) | |
| for mod_name in ["SafeBarChart", "CoverPage"]: | |
| if mod_name in sys.modules: | |
| del sys.modules[mod_name] | |
| if "/tmp" not in sys.path: | |
| sys.path.insert(0, "/tmp") | |
| # ── Verify | |
| from SafeBarChart import SafeBarChart as _SBC | |
| from CoverPage import draw_cover as _DC | |
| assert callable(_DC), "draw_cover not callable" | |
| _t = _SBC(data=[("Jan", 120000)], width=480, height=200) | |
| assert _t.wrap(500, 500) == (480, 200), "SafeBarChart.wrap() broken" | |
| print("✅ SafeBarChart + CoverPage modules registered and verified") | |
| # ── Model client | |
| client = InferenceClient( | |
| model="Qwen/Qwen2.5-7B-Instruct", | |
| token=HF_TOKEN, | |
| ) | |
| SYSTEM_PROMPT = """You are Document Generator, a specialist in creating professional documents programmatically. | |
| Your personality: Precise, design-aware, format-savvy, detail-oriented. | |
| ## Libraries: | |
| - PDF -> reportlab | Excel -> openpyxl | PowerPoint -> python-pptx | Word -> python-docx | |
| ## Document Quality Rules: | |
| 1. Consistent branding -- dark blue (#1F3564) primary, white text on dark backgrounds | |
| 2. Format all currency as $X,XXX -- never raw numbers like 120000 | |
| 3. Use realistic professional sample data if user provides none | |
| ## EXECUTION RULES (mandatory): | |
| 1. Generate ONLY clean executable Python code -- no markdown fences, no prose, no comments | |
| 2. Keep functions short -- max 20 lines each | |
| 3. Use simple string concatenation -- NEVER f-strings | |
| 4. ALWAYS save to: /tmp/output_doc.pdf / .xlsx / .pptx / .docx | |
| 5. End response with exactly: FILE:/tmp/output_doc.<extension> | |
| 6. Return ONLY code + FILE: line. Nothing else. | |
| 7. Close ALL parentheses, brackets, and quotes properly. | |
| 8. NEVER include emoji or special unicode characters in code. | |
| ## reportlab PDF -- Platypus (multi-page docs): | |
| from reportlab.lib.pagesizes import letter | |
| from reportlab.lib import colors | |
| from reportlab.lib.colors import HexColor, black | |
| from reportlab.lib.units import inch | |
| from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak | |
| from reportlab.lib.styles import getSampleStyleSheet | |
| from SafeBarChart import SafeBarChart | |
| from CoverPage import draw_cover | |
| styles = getSampleStyleSheet() | |
| def build_doc(filename, elements, company="DataFlow Inc", subtitle="Monthly Business Report", date="May 2026"): | |
| doc = SimpleDocTemplate(filename, pagesize=letter, | |
| leftMargin=inch, rightMargin=inch, | |
| topMargin=inch, bottomMargin=0.75*inch) | |
| def first_page(canvas, doc): | |
| draw_cover(canvas, doc, company=company, subtitle=subtitle, date=date) | |
| def later_pages(canvas, doc): | |
| canvas.saveState() | |
| canvas.setFont("Helvetica", 8) | |
| canvas.drawRightString(letter[0] - inch, 0.5*inch, "Page " + str(doc.page)) | |
| canvas.restoreState() | |
| doc.build(elements, onFirstPage=first_page, onLaterPages=later_pages) | |
| # Start elements with PageBreak() so content begins on page 2 | |
| elements = [] | |
| elements.append(PageBreak()) | |
| elements.append(Paragraph("Content here", styles["Normal"])) | |
| ## Bar Charts -- SafeBarChart only: | |
| from SafeBarChart import SafeBarChart | |
| chart_data = [('Jan', 120000), ('Feb', 150000)] | |
| elements.append(SafeBarChart(data=chart_data, width=480, height=200)) | |
| # NEVER use VerticalBarChart or redefine SafeBarChart | |
| ## reportlab Table Styling: | |
| table_style = TableStyle([ | |
| ('BACKGROUND', (0,0), (-1,0), HexColor('#1F3564')), | |
| ('TEXTCOLOR', (0,0), (-1,0), colors.white), | |
| ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'), | |
| ('FONTSIZE', (0,0), (-1,0), 11), | |
| ('ALIGN', (0,0), (-1,-1), 'CENTER'), | |
| ('ROWBACKGROUNDS', (0,1), (-1,-1), [HexColor('#EBF0FA'), colors.white]), | |
| ('GRID', (0,0), (-1,-1), 0.5, HexColor('#CCCCCC')), | |
| ('FONTNAME', (0,1), (-1,-1), 'Helvetica'), | |
| ('FONTSIZE', (0,1), (-1,-1), 10), | |
| ('TOPPADDING', (0,0), (-1,-1), 6), | |
| ('BOTTOMPADDING', (0,0), (-1,-1), 6), | |
| ]) | |
| t = Table(data, colWidths=[2.5*inch, 2.5*inch]) | |
| t.setStyle(table_style) | |
| ## Canvas-only (simple single-page ONLY): | |
| from reportlab.pdfgen import canvas as pdfcanvas | |
| c = pdfcanvas.Canvas("/tmp/output_doc.pdf", pagesize=letter) | |
| c.drawString(100, 700, "Hello") | |
| c.showPage() | |
| c.save() | |
| ## openpyxl Rules: | |
| from openpyxl import Workbook | |
| from openpyxl.styles import Font, PatternFill, Alignment | |
| wb = Workbook() | |
| ws = wb.active | |
| # Write headers first, then data rows | |
| headers = ['Name', 'Department', 'Salary'] | |
| ws.append(headers) | |
| # Style header row | |
| for cell in ws[1]: | |
| cell.fill = PatternFill("solid", fgColor="1F3564") | |
| cell.font = Font(bold=True, color="FFFFFF") | |
| cell.alignment = Alignment(horizontal="center") | |
| # Freeze top row | |
| ws.freeze_panes = 'A2' | |
| # Safe auto-width -- ALWAYS use this exact loop, never use max() on column: | |
| for col in ws.columns: | |
| max_len = 0 | |
| col_letter = col[0].column_letter | |
| for cell in col: | |
| try: | |
| cell_len = len(str(cell.value)) if cell.value is not None else 0 | |
| if cell_len > max_len: | |
| max_len = cell_len | |
| except: | |
| pass | |
| ws.column_dimensions[col_letter].width = max_len + 4 | |
| # ALWAYS save last: | |
| wb.save("/tmp/output_doc.xlsx") | |
| ## python-pptx Rules: | |
| from pptx import Presentation | |
| from pptx.util import Inches, Pt | |
| from pptx.dml.color import RGBColor # RGBColor is from pptx.dml.color NOT pptx.util | |
| prs = Presentation() | |
| prs.slide_width = Inches(13.33) | |
| prs.slide_height = Inches(7.5) | |
| # ALWAYS use blank layout index 6 -- NEVER layouts[0] through [5] | |
| slide = prs.slides.add_slide(prs.slide_layouts[6]) | |
| # Set background | |
| fill = slide.background.fill | |
| fill.solid() | |
| fill.fore_color.rgb = RGBColor(0x1F, 0x35, 0x64) | |
| # ALWAYS add text as textboxes -- NEVER use placeholders or slide.shapes.title | |
| txBox = slide.shapes.add_textbox(Inches(1), Inches(2.5), Inches(11), Inches(2)) | |
| tf = txBox.text_frame | |
| tf.word_wrap = True | |
| p = tf.paragraphs[0] | |
| p.text = "Title Here" | |
| p.font.size = Pt(36) | |
| p.font.bold = True | |
| p.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) | |
| prs.save("/tmp/output_doc.pptx") | |
| ## python-docx Rules: | |
| from docx import Document | |
| from docx.shared import Inches, Pt | |
| from docx.enum.text import WD_ALIGN_PARAGRAPH | |
| doc = Document() | |
| # Page margins -- ONLY use sections for margins, nothing else | |
| doc.sections[0].left_margin = Inches(1) | |
| doc.sections[0].right_margin = Inches(1) | |
| # Add content DIRECTLY on doc -- NEVER call add_paragraph/add_heading on sections | |
| doc.add_heading('Main Title', level=0) | |
| doc.add_heading('Section One', level=1) | |
| doc.add_paragraph('Paragraph text here.') | |
| doc.add_heading('Section Two', level=1) | |
| doc.add_paragraph('More paragraph text.') | |
| # Tables -- add directly on doc | |
| table = doc.add_table(rows=1, cols=3) | |
| table.style = 'Table Grid' | |
| hdr = table.rows[0].cells | |
| hdr[0].text = 'Task' | |
| hdr[1].text = 'Owner' | |
| hdr[2].text = 'Status' | |
| row = table.add_row().cells | |
| row[0].text = 'Sample Task' | |
| row[1].text = 'John' | |
| row[2].text = 'Done' | |
| # ALWAYS save last: | |
| doc.save("/tmp/output_doc.docx")""" | |
| def cleanup_output_files(): | |
| for ext in [".pdf", ".xlsx", ".pptx", ".docx"]: | |
| stale = "/tmp/output_doc" + ext | |
| if os.path.exists(stale): | |
| os.remove(stale) | |
| def find_output_file(hint, generated_after): | |
| candidates = [] | |
| if hint: | |
| candidates.append(hint) | |
| for ext in [".pdf", ".xlsx", ".pptx", ".docx"]: | |
| candidates.append("/tmp/output_doc" + ext) | |
| for path in candidates: | |
| if not os.path.exists(path): | |
| continue | |
| if os.path.getsize(path) < 500: | |
| continue | |
| if os.path.getmtime(path) < generated_after: | |
| continue | |
| return path | |
| return None | |
| def generate_document(user_request, history): | |
| if not user_request.strip(): | |
| return history, None, "Please describe the document you want." | |
| if not HF_TOKEN: | |
| history = history + [ | |
| {"role": "user", "content": user_request}, | |
| {"role": "assistant", "content": "HF_TOKEN is not set."} | |
| ] | |
| return history, None, "" | |
| messages = [{"role": "system", "content": SYSTEM_PROMPT}] | |
| for h in history: | |
| msg = {"role": h["role"], "content": h["content"]} | |
| if msg["role"] == "assistant": | |
| msg["content"] = msg["content"].encode("ascii", errors="ignore").decode("ascii") | |
| messages.append(msg) | |
| messages.append({"role": "user", "content": user_request}) | |
| def clean_code(raw): | |
| raw = re.sub(r"^```(?:python)?\n?", "", raw.strip()) | |
| raw = re.sub(r"\n?```$", "", raw.strip()) | |
| raw = raw.encode("ascii", errors="ignore").decode("ascii") | |
| raw = raw.strip() | |
| try: | |
| compile(raw, "<string>", "exec") | |
| except SyntaxError as se: | |
| raise SyntaxError("Syntax error (likely truncated): " + str(se)) | |
| return raw | |
| def extract_file_path(raw): | |
| path = None | |
| lines = raw.strip().split("\n") | |
| clean_lines = [] | |
| for line in lines: | |
| if line.startswith("FILE:"): | |
| path = line.replace("FILE:", "").strip() | |
| else: | |
| clean_lines.append(line) | |
| return path, "\n".join(clean_lines) | |
| def run_code(code): | |
| if "/tmp" not in sys.path: | |
| sys.path.insert(0, "/tmp") | |
| for mod_name in ["SafeBarChart", "CoverPage"]: | |
| if mod_name in sys.modules: | |
| importlib.reload(sys.modules[mod_name]) | |
| exec_globals = { | |
| "__builtins__": __builtins__, | |
| "sys": sys, | |
| "safe_float": lambda x: float(str(x).replace(",", "").replace("$", "").strip()) | |
| } | |
| exec(code, exec_globals) | |
| cleanup_output_files() | |
| run_start = time.time() | |
| try: | |
| response = client.chat_completion( | |
| messages=messages, | |
| max_tokens=4096, | |
| temperature=0.2 | |
| ) | |
| raw_code = response.choices[0].message.content.strip() | |
| file_path, raw_code = extract_file_path(raw_code) | |
| code = clean_code(raw_code) | |
| run_code(code) | |
| output_file = find_output_file(file_path, generated_after=run_start) | |
| if output_file: | |
| history = history + [ | |
| {"role": "user", "content": user_request}, | |
| {"role": "assistant", "content": "Document ready! Download it on the right."} | |
| ] | |
| return history, output_file, code | |
| else: | |
| history = history + [ | |
| {"role": "user", "content": user_request}, | |
| {"role": "assistant", "content": "Code ran but no file was created. See code panel."} | |
| ] | |
| return history, None, code | |
| except Exception as e: | |
| error_str = str(e) | |
| if "VerticalBarChart" in error_str or "Illegal attribute" in error_str: | |
| retry_instruction = "Use SafeBarChart: from SafeBarChart import SafeBarChart. Never use VerticalBarChart." | |
| elif "invalid syntax" in error_str or "SyntaxError" in error_str or "was never closed" in error_str or "truncated" in error_str: | |
| retry_instruction = "Syntax error. Regenerate simpler: max 15 lines per function, string concatenation not f-strings, close ALL brackets, under 80 lines. No emoji or unicode." | |
| elif "too large on page" in error_str: | |
| retry_instruction = "For cover pages use draw_cover as onFirstPage callback in build_doc(). Start elements with PageBreak(). Never add draw_cover to elements list." | |
| elif "canvas" in error_str and "not defined" in error_str: | |
| retry_instruction = "Use draw_cover from CoverPage as onFirstPage callback. Never call canvas methods directly in Platypus." | |
| elif "placeholder" in error_str or "idx" in error_str: | |
| retry_instruction = "Never use slide.placeholders or slide.shapes.title. Use prs.slide_layouts[6] and add_textbox() for all text." | |
| elif "cannot import name 'RGBColor'" in error_str: | |
| retry_instruction = "Wrong import. RGBColor must come from pptx.dml.color not pptx.util. Use: from pptx.dml.color import RGBColor" | |
| elif "Section" in error_str and "add_paragraph" in error_str: | |
| retry_instruction = "Never call add_paragraph or add_heading on doc.sections. Only use doc.sections[0] for margins. Add all content directly on doc: doc.add_heading(), doc.add_paragraph(), doc.add_table()." | |
| elif "not supported between instances" in error_str or "has no attribute 'column_letter'" in error_str: | |
| retry_instruction = "Excel column width error. Use safe loop: for col in ws.columns, col_letter = col[0].column_letter, loop cells with if cell.value is not None check, track max with if/else not max(), set ws.column_dimensions[col_letter].width = max_len + 4." | |
| elif "has no attribute 'canvas'" in error_str or "has no attribute 'canv'" in error_str: | |
| retry_instruction = "Use Canvas-only API: pdfcanvas.Canvas(), c.drawString(), c.rect(), c.showPage(), c.save(). No Platypus." | |
| else: | |
| retry_instruction = "Code crashed with: " + error_str[:300] + ". Regenerate clean simple version under 80 lines. No emoji." | |
| cleanup_output_files() | |
| retry_start = time.time() | |
| try: | |
| messages.append({"role": "user", "content": retry_instruction}) | |
| retry_response = client.chat_completion( | |
| messages=messages, | |
| max_tokens=4096, | |
| temperature=0.1 | |
| ) | |
| raw_code = retry_response.choices[0].message.content.strip() | |
| file_path, raw_code = extract_file_path(raw_code) | |
| code = clean_code(raw_code) | |
| run_code(code) | |
| output_file = find_output_file(file_path, generated_after=retry_start) | |
| if output_file: | |
| history = history + [ | |
| {"role": "user", "content": user_request}, | |
| {"role": "assistant", "content": "Document ready (auto-fixed)! Download it on the right."} | |
| ] | |
| return history, output_file, code | |
| else: | |
| history = history + [ | |
| {"role": "user", "content": user_request}, | |
| {"role": "assistant", "content": "Auto-retry ran but no file found. Check code panel."} | |
| ] | |
| return history, None, code | |
| except Exception as retry_e: | |
| error_msg = "Error (after auto-retry): " + str(retry_e) + "\n\n" + traceback.format_exc() | |
| history = history + [ | |
| {"role": "user", "content": user_request}, | |
| {"role": "assistant", "content": error_msg} | |
| ] | |
| return history, None, "" | |
| with gr.Blocks(title="Document Generator") as demo: | |
| gr.Markdown( | |
| "# Document Generation Specialist\n" | |
| "> Generate professional **PDFs, Excel sheets, PowerPoint decks, and Word docs** -- just describe what you want." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| chatbot = gr.Chatbot( | |
| label="Conversation", | |
| height=450, | |
| show_label=True, | |
| ) | |
| with gr.Row(): | |
| user_input = gr.Textbox( | |
| placeholder="E.g. 'Create a Q2 sales report PDF with a revenue bar chart and summary table'", | |
| label="What document do you need?", | |
| lines=3, | |
| scale=4 | |
| ) | |
| submit_btn = gr.Button( | |
| "Generate", | |
| variant="primary", | |
| scale=1, | |
| elem_classes=["generate-btn"] | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Download Your Document") | |
| file_output = gr.File(label="Generated Document") | |
| gr.Markdown("### Generated Code") | |
| code_output = gr.Code( | |
| language="python", | |
| label="Code used to create your document", | |
| lines=18, | |
| interactive=False | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| "Create a simple PDF with title Hello DataFlow in large bold dark blue font centered on the page", | |
| "Make an Excel file with columns Name, Department, Salary, Start Date and 6 rows. Bold dark blue headers. Freeze top row. Auto-width columns.", | |
| "Create a Word document titled Project Summary with headings Overview and Next Steps, 2 paragraphs each, and a 3-column table Task/Owner/Status with 4 rows.", | |
| "Generate a 3-slide PowerPoint for DataFlow Inc. Use slide_layouts[6] and add_textbox() only. Slide 1: dark blue background white title. Slide 2: agenda 4 bullets. Slide 3: thank you.", | |
| "Create a PDF invoice for DataFlow Inc. Client: Acme Corp. 3 line items totaling $4,300. Include subtotal, 8% tax, and grand total.", | |
| "Build an Excel expense tracker with Month, Category, Description, Amount columns and 8 rows. Add totals row.", | |
| "Create a 3-page PDF for DataFlow Inc. Page 1: cover via draw_cover callback. Page 2: executive summary + revenue table Jan-Jun. Page 3: SafeBarChart. Use build_doc() with draw_cover as onFirstPage. Start elements with PageBreak().", | |
| ], | |
| inputs=user_input, | |
| label="Try These Examples" | |
| ) | |
| history_state = gr.State([]) | |
| def run(msg, hist): | |
| return generate_document(msg, hist) | |
| submit_btn.click( | |
| fn=run, | |
| inputs=[user_input, history_state], | |
| outputs=[chatbot, file_output, code_output], | |
| ).then(lambda: "", outputs=user_input) | |
| user_input.submit( | |
| fn=run, | |
| inputs=[user_input, history_state], | |
| outputs=[chatbot, file_output, code_output], | |
| ).then(lambda: "", outputs=user_input) | |
| chatbot.change(fn=lambda x: x, inputs=chatbot, outputs=history_state) | |
| demo.launch( | |
| theme=gr.themes.Soft(primary_hue="blue"), | |
| css="footer { display: none !important; } .generate-btn { min-height: 80px !important; }" | |
| ) | |