import gradio as gr import os import sys import time import importlib import traceback import re from huggingface_hub import InferenceClient # ── Token loading HF_TOKEN = os.environ.get("HF_TOKEN") if not HF_TOKEN: print("⚠️ WARNING: HF_TOKEN environment variable is not set!") else: print(f"✅ HF_TOKEN loaded — starts with: {HF_TOKEN[:8]}...") # ── SafeBarChart module _safe_bar_code = ''' from reportlab.platypus import Flowable from reportlab.lib.colors import HexColor, black class SafeBarChart(Flowable): def __init__(self, data, width=480, height=180, color="#4472C4"): super().__init__() self.chart_data = [(str(l), float(str(v).replace(",","").replace("$",""))) for l, v in data] self.width = width self.height = height self.color = color def wrap(self, availWidth, availHeight): return (self.width, self.height) def draw(self): n = len(self.chart_data) if n == 0: return max_val = max(v for _, v in self.chart_data) if max(v for _, v in self.chart_data) > 0 else 1 chart_area_h = self.height - 40 total_w = self.width - 20 bar_w = max(20, (total_w / n) * 0.6) gap = max(5, (total_w / n) * 0.4) total_used = n * bar_w + (n - 1) * gap x_offset = (self.width - total_used) / 2 for i, (label, val) in enumerate(self.chart_data): bar_h = max(4, (val / max_val) * chart_area_h) x = x_offset + i * (bar_w + gap) y = 25 self.canv.setFillColor(HexColor(self.color)) self.canv.rect(x, y, bar_w, bar_h, fill=1, stroke=0) self.canv.setFillColor(black) self.canv.setFont("Helvetica", 6) if val >= 1000000: short_val = "$" + str(round(val / 1000000, 1)) + "M" elif val >= 1000: short_val = "$" + str(int(val / 1000)) + "K" else: short_val = str(int(val)) self.canv.drawCentredString(x + bar_w / 2, y + bar_h + 3, short_val) self.canv.drawCentredString(x + bar_w / 2, y - 10, label[:3]) ''' # ── CoverPage module _cover_page_code = ''' from reportlab.lib.pagesizes import letter from reportlab.lib.colors import HexColor, white def draw_cover(canvas, doc, company="DataFlow Inc", subtitle="Monthly Business Report", date="May 2026"): w, h = letter canvas.saveState() canvas.resetTransforms() canvas.setFillColor(white) canvas.rect(0, 0, w, h, fill=1, stroke=0) canvas.setFillColor(HexColor("#1F3564")) canvas.rect(0, h - 120, w, 120, fill=1, stroke=0) canvas.setFillColor(white) canvas.setFont("Helvetica-Bold", 28) canvas.drawCentredString(w / 2, h - 75, company) canvas.setFillColor(HexColor("#1F3564")) canvas.setFont("Helvetica-Bold", 18) canvas.drawCentredString(w / 2, h - 200, subtitle) canvas.setFillColor(HexColor("#666666")) canvas.setFont("Helvetica", 13) canvas.drawCentredString(w / 2, h - 240, date) canvas.setStrokeColor(HexColor("#4472C4")) canvas.setLineWidth(2) canvas.line(w * 0.2, h - 270, w * 0.8, h - 270) canvas.restoreState() ''' # ── Write modules and clear cache with open("/tmp/SafeBarChart.py", "w") as f: f.write(_safe_bar_code) with open("/tmp/CoverPage.py", "w") as f: f.write(_cover_page_code) for mod_name in ["SafeBarChart", "CoverPage"]: if mod_name in sys.modules: del sys.modules[mod_name] if "/tmp" not in sys.path: sys.path.insert(0, "/tmp") # ── Verify from SafeBarChart import SafeBarChart as _SBC from CoverPage import draw_cover as _DC assert callable(_DC), "draw_cover not callable" _t = _SBC(data=[("Jan", 120000)], width=480, height=200) assert _t.wrap(500, 500) == (480, 200), "SafeBarChart.wrap() broken" print("✅ SafeBarChart + CoverPage modules registered and verified") # ── Model client client = InferenceClient( model="Qwen/Qwen2.5-7B-Instruct", token=HF_TOKEN, ) SYSTEM_PROMPT = """You are Document Generator, a specialist in creating professional documents programmatically. Your personality: Precise, design-aware, format-savvy, detail-oriented. ## Libraries: - PDF -> reportlab | Excel -> openpyxl | PowerPoint -> python-pptx | Word -> python-docx ## Document Quality Rules: 1. Consistent branding -- dark blue (#1F3564) primary, white text on dark backgrounds 2. Format all currency as $X,XXX -- never raw numbers like 120000 3. Use realistic professional sample data if user provides none ## EXECUTION RULES (mandatory): 1. Generate ONLY clean executable Python code -- no markdown fences, no prose, no comments 2. Keep functions short -- max 20 lines each 3. Use simple string concatenation -- NEVER f-strings 4. ALWAYS save to: /tmp/output_doc.pdf / .xlsx / .pptx / .docx 5. End response with exactly: FILE:/tmp/output_doc. 6. Return ONLY code + FILE: line. Nothing else. 7. Close ALL parentheses, brackets, and quotes properly. 8. NEVER include emoji or special unicode characters in code. ## reportlab PDF -- Platypus (multi-page docs): from reportlab.lib.pagesizes import letter from reportlab.lib import colors from reportlab.lib.colors import HexColor, black from reportlab.lib.units import inch from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak from reportlab.lib.styles import getSampleStyleSheet from SafeBarChart import SafeBarChart from CoverPage import draw_cover styles = getSampleStyleSheet() def build_doc(filename, elements, company="DataFlow Inc", subtitle="Monthly Business Report", date="May 2026"): doc = SimpleDocTemplate(filename, pagesize=letter, leftMargin=inch, rightMargin=inch, topMargin=inch, bottomMargin=0.75*inch) def first_page(canvas, doc): draw_cover(canvas, doc, company=company, subtitle=subtitle, date=date) def later_pages(canvas, doc): canvas.saveState() canvas.setFont("Helvetica", 8) canvas.drawRightString(letter[0] - inch, 0.5*inch, "Page " + str(doc.page)) canvas.restoreState() doc.build(elements, onFirstPage=first_page, onLaterPages=later_pages) # Start elements with PageBreak() so content begins on page 2 elements = [] elements.append(PageBreak()) elements.append(Paragraph("Content here", styles["Normal"])) ## Bar Charts -- SafeBarChart only: from SafeBarChart import SafeBarChart chart_data = [('Jan', 120000), ('Feb', 150000)] elements.append(SafeBarChart(data=chart_data, width=480, height=200)) # NEVER use VerticalBarChart or redefine SafeBarChart ## reportlab Table Styling: table_style = TableStyle([ ('BACKGROUND', (0,0), (-1,0), HexColor('#1F3564')), ('TEXTCOLOR', (0,0), (-1,0), colors.white), ('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'), ('FONTSIZE', (0,0), (-1,0), 11), ('ALIGN', (0,0), (-1,-1), 'CENTER'), ('ROWBACKGROUNDS', (0,1), (-1,-1), [HexColor('#EBF0FA'), colors.white]), ('GRID', (0,0), (-1,-1), 0.5, HexColor('#CCCCCC')), ('FONTNAME', (0,1), (-1,-1), 'Helvetica'), ('FONTSIZE', (0,1), (-1,-1), 10), ('TOPPADDING', (0,0), (-1,-1), 6), ('BOTTOMPADDING', (0,0), (-1,-1), 6), ]) t = Table(data, colWidths=[2.5*inch, 2.5*inch]) t.setStyle(table_style) ## Canvas-only (simple single-page ONLY): from reportlab.pdfgen import canvas as pdfcanvas c = pdfcanvas.Canvas("/tmp/output_doc.pdf", pagesize=letter) c.drawString(100, 700, "Hello") c.showPage() c.save() ## openpyxl Rules: from openpyxl import Workbook from openpyxl.styles import Font, PatternFill, Alignment wb = Workbook() ws = wb.active # Write headers first, then data rows headers = ['Name', 'Department', 'Salary'] ws.append(headers) # Style header row for cell in ws[1]: cell.fill = PatternFill("solid", fgColor="1F3564") cell.font = Font(bold=True, color="FFFFFF") cell.alignment = Alignment(horizontal="center") # Freeze top row ws.freeze_panes = 'A2' # Safe auto-width -- ALWAYS use this exact loop, never use max() on column: for col in ws.columns: max_len = 0 col_letter = col[0].column_letter for cell in col: try: cell_len = len(str(cell.value)) if cell.value is not None else 0 if cell_len > max_len: max_len = cell_len except: pass ws.column_dimensions[col_letter].width = max_len + 4 # ALWAYS save last: wb.save("/tmp/output_doc.xlsx") ## python-pptx Rules: from pptx import Presentation from pptx.util import Inches, Pt from pptx.dml.color import RGBColor # RGBColor is from pptx.dml.color NOT pptx.util prs = Presentation() prs.slide_width = Inches(13.33) prs.slide_height = Inches(7.5) # ALWAYS use blank layout index 6 -- NEVER layouts[0] through [5] slide = prs.slides.add_slide(prs.slide_layouts[6]) # Set background fill = slide.background.fill fill.solid() fill.fore_color.rgb = RGBColor(0x1F, 0x35, 0x64) # ALWAYS add text as textboxes -- NEVER use placeholders or slide.shapes.title txBox = slide.shapes.add_textbox(Inches(1), Inches(2.5), Inches(11), Inches(2)) tf = txBox.text_frame tf.word_wrap = True p = tf.paragraphs[0] p.text = "Title Here" p.font.size = Pt(36) p.font.bold = True p.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF) prs.save("/tmp/output_doc.pptx") ## python-docx Rules: from docx import Document from docx.shared import Inches, Pt from docx.enum.text import WD_ALIGN_PARAGRAPH doc = Document() # Page margins -- ONLY use sections for margins, nothing else doc.sections[0].left_margin = Inches(1) doc.sections[0].right_margin = Inches(1) # Add content DIRECTLY on doc -- NEVER call add_paragraph/add_heading on sections doc.add_heading('Main Title', level=0) doc.add_heading('Section One', level=1) doc.add_paragraph('Paragraph text here.') doc.add_heading('Section Two', level=1) doc.add_paragraph('More paragraph text.') # Tables -- add directly on doc table = doc.add_table(rows=1, cols=3) table.style = 'Table Grid' hdr = table.rows[0].cells hdr[0].text = 'Task' hdr[1].text = 'Owner' hdr[2].text = 'Status' row = table.add_row().cells row[0].text = 'Sample Task' row[1].text = 'John' row[2].text = 'Done' # ALWAYS save last: doc.save("/tmp/output_doc.docx")""" def cleanup_output_files(): for ext in [".pdf", ".xlsx", ".pptx", ".docx"]: stale = "/tmp/output_doc" + ext if os.path.exists(stale): os.remove(stale) def find_output_file(hint, generated_after): candidates = [] if hint: candidates.append(hint) for ext in [".pdf", ".xlsx", ".pptx", ".docx"]: candidates.append("/tmp/output_doc" + ext) for path in candidates: if not os.path.exists(path): continue if os.path.getsize(path) < 500: continue if os.path.getmtime(path) < generated_after: continue return path return None def generate_document(user_request, history): if not user_request.strip(): return history, None, "Please describe the document you want." if not HF_TOKEN: history = history + [ {"role": "user", "content": user_request}, {"role": "assistant", "content": "HF_TOKEN is not set."} ] return history, None, "" messages = [{"role": "system", "content": SYSTEM_PROMPT}] for h in history: msg = {"role": h["role"], "content": h["content"]} if msg["role"] == "assistant": msg["content"] = msg["content"].encode("ascii", errors="ignore").decode("ascii") messages.append(msg) messages.append({"role": "user", "content": user_request}) def clean_code(raw): raw = re.sub(r"^```(?:python)?\n?", "", raw.strip()) raw = re.sub(r"\n?```$", "", raw.strip()) raw = raw.encode("ascii", errors="ignore").decode("ascii") raw = raw.strip() try: compile(raw, "", "exec") except SyntaxError as se: raise SyntaxError("Syntax error (likely truncated): " + str(se)) return raw def extract_file_path(raw): path = None lines = raw.strip().split("\n") clean_lines = [] for line in lines: if line.startswith("FILE:"): path = line.replace("FILE:", "").strip() else: clean_lines.append(line) return path, "\n".join(clean_lines) def run_code(code): if "/tmp" not in sys.path: sys.path.insert(0, "/tmp") for mod_name in ["SafeBarChart", "CoverPage"]: if mod_name in sys.modules: importlib.reload(sys.modules[mod_name]) exec_globals = { "__builtins__": __builtins__, "sys": sys, "safe_float": lambda x: float(str(x).replace(",", "").replace("$", "").strip()) } exec(code, exec_globals) cleanup_output_files() run_start = time.time() try: response = client.chat_completion( messages=messages, max_tokens=4096, temperature=0.2 ) raw_code = response.choices[0].message.content.strip() file_path, raw_code = extract_file_path(raw_code) code = clean_code(raw_code) run_code(code) output_file = find_output_file(file_path, generated_after=run_start) if output_file: history = history + [ {"role": "user", "content": user_request}, {"role": "assistant", "content": "Document ready! Download it on the right."} ] return history, output_file, code else: history = history + [ {"role": "user", "content": user_request}, {"role": "assistant", "content": "Code ran but no file was created. See code panel."} ] return history, None, code except Exception as e: error_str = str(e) if "VerticalBarChart" in error_str or "Illegal attribute" in error_str: retry_instruction = "Use SafeBarChart: from SafeBarChart import SafeBarChart. Never use VerticalBarChart." elif "invalid syntax" in error_str or "SyntaxError" in error_str or "was never closed" in error_str or "truncated" in error_str: retry_instruction = "Syntax error. Regenerate simpler: max 15 lines per function, string concatenation not f-strings, close ALL brackets, under 80 lines. No emoji or unicode." elif "too large on page" in error_str: retry_instruction = "For cover pages use draw_cover as onFirstPage callback in build_doc(). Start elements with PageBreak(). Never add draw_cover to elements list." elif "canvas" in error_str and "not defined" in error_str: retry_instruction = "Use draw_cover from CoverPage as onFirstPage callback. Never call canvas methods directly in Platypus." elif "placeholder" in error_str or "idx" in error_str: retry_instruction = "Never use slide.placeholders or slide.shapes.title. Use prs.slide_layouts[6] and add_textbox() for all text." elif "cannot import name 'RGBColor'" in error_str: retry_instruction = "Wrong import. RGBColor must come from pptx.dml.color not pptx.util. Use: from pptx.dml.color import RGBColor" elif "Section" in error_str and "add_paragraph" in error_str: retry_instruction = "Never call add_paragraph or add_heading on doc.sections. Only use doc.sections[0] for margins. Add all content directly on doc: doc.add_heading(), doc.add_paragraph(), doc.add_table()." elif "not supported between instances" in error_str or "has no attribute 'column_letter'" in error_str: retry_instruction = "Excel column width error. Use safe loop: for col in ws.columns, col_letter = col[0].column_letter, loop cells with if cell.value is not None check, track max with if/else not max(), set ws.column_dimensions[col_letter].width = max_len + 4." elif "has no attribute 'canvas'" in error_str or "has no attribute 'canv'" in error_str: retry_instruction = "Use Canvas-only API: pdfcanvas.Canvas(), c.drawString(), c.rect(), c.showPage(), c.save(). No Platypus." else: retry_instruction = "Code crashed with: " + error_str[:300] + ". Regenerate clean simple version under 80 lines. No emoji." cleanup_output_files() retry_start = time.time() try: messages.append({"role": "user", "content": retry_instruction}) retry_response = client.chat_completion( messages=messages, max_tokens=4096, temperature=0.1 ) raw_code = retry_response.choices[0].message.content.strip() file_path, raw_code = extract_file_path(raw_code) code = clean_code(raw_code) run_code(code) output_file = find_output_file(file_path, generated_after=retry_start) if output_file: history = history + [ {"role": "user", "content": user_request}, {"role": "assistant", "content": "Document ready (auto-fixed)! Download it on the right."} ] return history, output_file, code else: history = history + [ {"role": "user", "content": user_request}, {"role": "assistant", "content": "Auto-retry ran but no file found. Check code panel."} ] return history, None, code except Exception as retry_e: error_msg = "Error (after auto-retry): " + str(retry_e) + "\n\n" + traceback.format_exc() history = history + [ {"role": "user", "content": user_request}, {"role": "assistant", "content": error_msg} ] return history, None, "" with gr.Blocks(title="Document Generator") as demo: gr.Markdown( "# Document Generation Specialist\n" "> Generate professional **PDFs, Excel sheets, PowerPoint decks, and Word docs** -- just describe what you want." ) with gr.Row(): with gr.Column(scale=2): chatbot = gr.Chatbot( label="Conversation", height=450, show_label=True, ) with gr.Row(): user_input = gr.Textbox( placeholder="E.g. 'Create a Q2 sales report PDF with a revenue bar chart and summary table'", label="What document do you need?", lines=3, scale=4 ) submit_btn = gr.Button( "Generate", variant="primary", scale=1, elem_classes=["generate-btn"] ) with gr.Column(scale=1): gr.Markdown("### Download Your Document") file_output = gr.File(label="Generated Document") gr.Markdown("### Generated Code") code_output = gr.Code( language="python", label="Code used to create your document", lines=18, interactive=False ) gr.Examples( examples=[ "Create a simple PDF with title Hello DataFlow in large bold dark blue font centered on the page", "Make an Excel file with columns Name, Department, Salary, Start Date and 6 rows. Bold dark blue headers. Freeze top row. Auto-width columns.", "Create a Word document titled Project Summary with headings Overview and Next Steps, 2 paragraphs each, and a 3-column table Task/Owner/Status with 4 rows.", "Generate a 3-slide PowerPoint for DataFlow Inc. Use slide_layouts[6] and add_textbox() only. Slide 1: dark blue background white title. Slide 2: agenda 4 bullets. Slide 3: thank you.", "Create a PDF invoice for DataFlow Inc. Client: Acme Corp. 3 line items totaling $4,300. Include subtotal, 8% tax, and grand total.", "Build an Excel expense tracker with Month, Category, Description, Amount columns and 8 rows. Add totals row.", "Create a 3-page PDF for DataFlow Inc. Page 1: cover via draw_cover callback. Page 2: executive summary + revenue table Jan-Jun. Page 3: SafeBarChart. Use build_doc() with draw_cover as onFirstPage. Start elements with PageBreak().", ], inputs=user_input, label="Try These Examples" ) history_state = gr.State([]) def run(msg, hist): return generate_document(msg, hist) submit_btn.click( fn=run, inputs=[user_input, history_state], outputs=[chatbot, file_output, code_output], ).then(lambda: "", outputs=user_input) user_input.submit( fn=run, inputs=[user_input, history_state], outputs=[chatbot, file_output, code_output], ).then(lambda: "", outputs=user_input) chatbot.change(fn=lambda x: x, inputs=chatbot, outputs=history_state) demo.launch( theme=gr.themes.Soft(primary_hue="blue"), css="footer { display: none !important; } .generate-btn { min-height: 80px !important; }" )