Manojsha9530's picture
Update app.py
344ec7f verified
Raw
History Blame Contribute Delete
22 kB
import gradio as gr
import os
import sys
import time
import importlib
import traceback
import re
from huggingface_hub import InferenceClient
# ── Token loading
HF_TOKEN = os.environ.get("HF_TOKEN")
if not HF_TOKEN:
print("⚠️ WARNING: HF_TOKEN environment variable is not set!")
else:
print(f"✅ HF_TOKEN loaded — starts with: {HF_TOKEN[:8]}...")
# ── SafeBarChart module
_safe_bar_code = '''
from reportlab.platypus import Flowable
from reportlab.lib.colors import HexColor, black
class SafeBarChart(Flowable):
def __init__(self, data, width=480, height=180, color="#4472C4"):
super().__init__()
self.chart_data = [(str(l), float(str(v).replace(",","").replace("$",""))) for l, v in data]
self.width = width
self.height = height
self.color = color
def wrap(self, availWidth, availHeight):
return (self.width, self.height)
def draw(self):
n = len(self.chart_data)
if n == 0:
return
max_val = max(v for _, v in self.chart_data) if max(v for _, v in self.chart_data) > 0 else 1
chart_area_h = self.height - 40
total_w = self.width - 20
bar_w = max(20, (total_w / n) * 0.6)
gap = max(5, (total_w / n) * 0.4)
total_used = n * bar_w + (n - 1) * gap
x_offset = (self.width - total_used) / 2
for i, (label, val) in enumerate(self.chart_data):
bar_h = max(4, (val / max_val) * chart_area_h)
x = x_offset + i * (bar_w + gap)
y = 25
self.canv.setFillColor(HexColor(self.color))
self.canv.rect(x, y, bar_w, bar_h, fill=1, stroke=0)
self.canv.setFillColor(black)
self.canv.setFont("Helvetica", 6)
if val >= 1000000:
short_val = "$" + str(round(val / 1000000, 1)) + "M"
elif val >= 1000:
short_val = "$" + str(int(val / 1000)) + "K"
else:
short_val = str(int(val))
self.canv.drawCentredString(x + bar_w / 2, y + bar_h + 3, short_val)
self.canv.drawCentredString(x + bar_w / 2, y - 10, label[:3])
'''
# ── CoverPage module
_cover_page_code = '''
from reportlab.lib.pagesizes import letter
from reportlab.lib.colors import HexColor, white
def draw_cover(canvas, doc, company="DataFlow Inc", subtitle="Monthly Business Report", date="May 2026"):
w, h = letter
canvas.saveState()
canvas.resetTransforms()
canvas.setFillColor(white)
canvas.rect(0, 0, w, h, fill=1, stroke=0)
canvas.setFillColor(HexColor("#1F3564"))
canvas.rect(0, h - 120, w, 120, fill=1, stroke=0)
canvas.setFillColor(white)
canvas.setFont("Helvetica-Bold", 28)
canvas.drawCentredString(w / 2, h - 75, company)
canvas.setFillColor(HexColor("#1F3564"))
canvas.setFont("Helvetica-Bold", 18)
canvas.drawCentredString(w / 2, h - 200, subtitle)
canvas.setFillColor(HexColor("#666666"))
canvas.setFont("Helvetica", 13)
canvas.drawCentredString(w / 2, h - 240, date)
canvas.setStrokeColor(HexColor("#4472C4"))
canvas.setLineWidth(2)
canvas.line(w * 0.2, h - 270, w * 0.8, h - 270)
canvas.restoreState()
'''
# ── Write modules and clear cache
with open("/tmp/SafeBarChart.py", "w") as f:
f.write(_safe_bar_code)
with open("/tmp/CoverPage.py", "w") as f:
f.write(_cover_page_code)
for mod_name in ["SafeBarChart", "CoverPage"]:
if mod_name in sys.modules:
del sys.modules[mod_name]
if "/tmp" not in sys.path:
sys.path.insert(0, "/tmp")
# ── Verify
from SafeBarChart import SafeBarChart as _SBC
from CoverPage import draw_cover as _DC
assert callable(_DC), "draw_cover not callable"
_t = _SBC(data=[("Jan", 120000)], width=480, height=200)
assert _t.wrap(500, 500) == (480, 200), "SafeBarChart.wrap() broken"
print("✅ SafeBarChart + CoverPage modules registered and verified")
# ── Model client
client = InferenceClient(
model="Qwen/Qwen2.5-7B-Instruct",
token=HF_TOKEN,
)
SYSTEM_PROMPT = """You are Document Generator, a specialist in creating professional documents programmatically.
Your personality: Precise, design-aware, format-savvy, detail-oriented.
## Libraries:
- PDF -> reportlab | Excel -> openpyxl | PowerPoint -> python-pptx | Word -> python-docx
## Document Quality Rules:
1. Consistent branding -- dark blue (#1F3564) primary, white text on dark backgrounds
2. Format all currency as $X,XXX -- never raw numbers like 120000
3. Use realistic professional sample data if user provides none
## EXECUTION RULES (mandatory):
1. Generate ONLY clean executable Python code -- no markdown fences, no prose, no comments
2. Keep functions short -- max 20 lines each
3. Use simple string concatenation -- NEVER f-strings
4. ALWAYS save to: /tmp/output_doc.pdf / .xlsx / .pptx / .docx
5. End response with exactly: FILE:/tmp/output_doc.<extension>
6. Return ONLY code + FILE: line. Nothing else.
7. Close ALL parentheses, brackets, and quotes properly.
8. NEVER include emoji or special unicode characters in code.
## reportlab PDF -- Platypus (multi-page docs):
from reportlab.lib.pagesizes import letter
from reportlab.lib import colors
from reportlab.lib.colors import HexColor, black
from reportlab.lib.units import inch
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer, PageBreak
from reportlab.lib.styles import getSampleStyleSheet
from SafeBarChart import SafeBarChart
from CoverPage import draw_cover
styles = getSampleStyleSheet()
def build_doc(filename, elements, company="DataFlow Inc", subtitle="Monthly Business Report", date="May 2026"):
doc = SimpleDocTemplate(filename, pagesize=letter,
leftMargin=inch, rightMargin=inch,
topMargin=inch, bottomMargin=0.75*inch)
def first_page(canvas, doc):
draw_cover(canvas, doc, company=company, subtitle=subtitle, date=date)
def later_pages(canvas, doc):
canvas.saveState()
canvas.setFont("Helvetica", 8)
canvas.drawRightString(letter[0] - inch, 0.5*inch, "Page " + str(doc.page))
canvas.restoreState()
doc.build(elements, onFirstPage=first_page, onLaterPages=later_pages)
# Start elements with PageBreak() so content begins on page 2
elements = []
elements.append(PageBreak())
elements.append(Paragraph("Content here", styles["Normal"]))
## Bar Charts -- SafeBarChart only:
from SafeBarChart import SafeBarChart
chart_data = [('Jan', 120000), ('Feb', 150000)]
elements.append(SafeBarChart(data=chart_data, width=480, height=200))
# NEVER use VerticalBarChart or redefine SafeBarChart
## reportlab Table Styling:
table_style = TableStyle([
('BACKGROUND', (0,0), (-1,0), HexColor('#1F3564')),
('TEXTCOLOR', (0,0), (-1,0), colors.white),
('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
('FONTSIZE', (0,0), (-1,0), 11),
('ALIGN', (0,0), (-1,-1), 'CENTER'),
('ROWBACKGROUNDS', (0,1), (-1,-1), [HexColor('#EBF0FA'), colors.white]),
('GRID', (0,0), (-1,-1), 0.5, HexColor('#CCCCCC')),
('FONTNAME', (0,1), (-1,-1), 'Helvetica'),
('FONTSIZE', (0,1), (-1,-1), 10),
('TOPPADDING', (0,0), (-1,-1), 6),
('BOTTOMPADDING', (0,0), (-1,-1), 6),
])
t = Table(data, colWidths=[2.5*inch, 2.5*inch])
t.setStyle(table_style)
## Canvas-only (simple single-page ONLY):
from reportlab.pdfgen import canvas as pdfcanvas
c = pdfcanvas.Canvas("/tmp/output_doc.pdf", pagesize=letter)
c.drawString(100, 700, "Hello")
c.showPage()
c.save()
## openpyxl Rules:
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment
wb = Workbook()
ws = wb.active
# Write headers first, then data rows
headers = ['Name', 'Department', 'Salary']
ws.append(headers)
# Style header row
for cell in ws[1]:
cell.fill = PatternFill("solid", fgColor="1F3564")
cell.font = Font(bold=True, color="FFFFFF")
cell.alignment = Alignment(horizontal="center")
# Freeze top row
ws.freeze_panes = 'A2'
# Safe auto-width -- ALWAYS use this exact loop, never use max() on column:
for col in ws.columns:
max_len = 0
col_letter = col[0].column_letter
for cell in col:
try:
cell_len = len(str(cell.value)) if cell.value is not None else 0
if cell_len > max_len:
max_len = cell_len
except:
pass
ws.column_dimensions[col_letter].width = max_len + 4
# ALWAYS save last:
wb.save("/tmp/output_doc.xlsx")
## python-pptx Rules:
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.dml.color import RGBColor # RGBColor is from pptx.dml.color NOT pptx.util
prs = Presentation()
prs.slide_width = Inches(13.33)
prs.slide_height = Inches(7.5)
# ALWAYS use blank layout index 6 -- NEVER layouts[0] through [5]
slide = prs.slides.add_slide(prs.slide_layouts[6])
# Set background
fill = slide.background.fill
fill.solid()
fill.fore_color.rgb = RGBColor(0x1F, 0x35, 0x64)
# ALWAYS add text as textboxes -- NEVER use placeholders or slide.shapes.title
txBox = slide.shapes.add_textbox(Inches(1), Inches(2.5), Inches(11), Inches(2))
tf = txBox.text_frame
tf.word_wrap = True
p = tf.paragraphs[0]
p.text = "Title Here"
p.font.size = Pt(36)
p.font.bold = True
p.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
prs.save("/tmp/output_doc.pptx")
## python-docx Rules:
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
doc = Document()
# Page margins -- ONLY use sections for margins, nothing else
doc.sections[0].left_margin = Inches(1)
doc.sections[0].right_margin = Inches(1)
# Add content DIRECTLY on doc -- NEVER call add_paragraph/add_heading on sections
doc.add_heading('Main Title', level=0)
doc.add_heading('Section One', level=1)
doc.add_paragraph('Paragraph text here.')
doc.add_heading('Section Two', level=1)
doc.add_paragraph('More paragraph text.')
# Tables -- add directly on doc
table = doc.add_table(rows=1, cols=3)
table.style = 'Table Grid'
hdr = table.rows[0].cells
hdr[0].text = 'Task'
hdr[1].text = 'Owner'
hdr[2].text = 'Status'
row = table.add_row().cells
row[0].text = 'Sample Task'
row[1].text = 'John'
row[2].text = 'Done'
# ALWAYS save last:
doc.save("/tmp/output_doc.docx")"""
def cleanup_output_files():
for ext in [".pdf", ".xlsx", ".pptx", ".docx"]:
stale = "/tmp/output_doc" + ext
if os.path.exists(stale):
os.remove(stale)
def find_output_file(hint, generated_after):
candidates = []
if hint:
candidates.append(hint)
for ext in [".pdf", ".xlsx", ".pptx", ".docx"]:
candidates.append("/tmp/output_doc" + ext)
for path in candidates:
if not os.path.exists(path):
continue
if os.path.getsize(path) < 500:
continue
if os.path.getmtime(path) < generated_after:
continue
return path
return None
def generate_document(user_request, history):
if not user_request.strip():
return history, None, "Please describe the document you want."
if not HF_TOKEN:
history = history + [
{"role": "user", "content": user_request},
{"role": "assistant", "content": "HF_TOKEN is not set."}
]
return history, None, ""
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for h in history:
msg = {"role": h["role"], "content": h["content"]}
if msg["role"] == "assistant":
msg["content"] = msg["content"].encode("ascii", errors="ignore").decode("ascii")
messages.append(msg)
messages.append({"role": "user", "content": user_request})
def clean_code(raw):
raw = re.sub(r"^```(?:python)?\n?", "", raw.strip())
raw = re.sub(r"\n?```$", "", raw.strip())
raw = raw.encode("ascii", errors="ignore").decode("ascii")
raw = raw.strip()
try:
compile(raw, "<string>", "exec")
except SyntaxError as se:
raise SyntaxError("Syntax error (likely truncated): " + str(se))
return raw
def extract_file_path(raw):
path = None
lines = raw.strip().split("\n")
clean_lines = []
for line in lines:
if line.startswith("FILE:"):
path = line.replace("FILE:", "").strip()
else:
clean_lines.append(line)
return path, "\n".join(clean_lines)
def run_code(code):
if "/tmp" not in sys.path:
sys.path.insert(0, "/tmp")
for mod_name in ["SafeBarChart", "CoverPage"]:
if mod_name in sys.modules:
importlib.reload(sys.modules[mod_name])
exec_globals = {
"__builtins__": __builtins__,
"sys": sys,
"safe_float": lambda x: float(str(x).replace(",", "").replace("$", "").strip())
}
exec(code, exec_globals)
cleanup_output_files()
run_start = time.time()
try:
response = client.chat_completion(
messages=messages,
max_tokens=4096,
temperature=0.2
)
raw_code = response.choices[0].message.content.strip()
file_path, raw_code = extract_file_path(raw_code)
code = clean_code(raw_code)
run_code(code)
output_file = find_output_file(file_path, generated_after=run_start)
if output_file:
history = history + [
{"role": "user", "content": user_request},
{"role": "assistant", "content": "Document ready! Download it on the right."}
]
return history, output_file, code
else:
history = history + [
{"role": "user", "content": user_request},
{"role": "assistant", "content": "Code ran but no file was created. See code panel."}
]
return history, None, code
except Exception as e:
error_str = str(e)
if "VerticalBarChart" in error_str or "Illegal attribute" in error_str:
retry_instruction = "Use SafeBarChart: from SafeBarChart import SafeBarChart. Never use VerticalBarChart."
elif "invalid syntax" in error_str or "SyntaxError" in error_str or "was never closed" in error_str or "truncated" in error_str:
retry_instruction = "Syntax error. Regenerate simpler: max 15 lines per function, string concatenation not f-strings, close ALL brackets, under 80 lines. No emoji or unicode."
elif "too large on page" in error_str:
retry_instruction = "For cover pages use draw_cover as onFirstPage callback in build_doc(). Start elements with PageBreak(). Never add draw_cover to elements list."
elif "canvas" in error_str and "not defined" in error_str:
retry_instruction = "Use draw_cover from CoverPage as onFirstPage callback. Never call canvas methods directly in Platypus."
elif "placeholder" in error_str or "idx" in error_str:
retry_instruction = "Never use slide.placeholders or slide.shapes.title. Use prs.slide_layouts[6] and add_textbox() for all text."
elif "cannot import name 'RGBColor'" in error_str:
retry_instruction = "Wrong import. RGBColor must come from pptx.dml.color not pptx.util. Use: from pptx.dml.color import RGBColor"
elif "Section" in error_str and "add_paragraph" in error_str:
retry_instruction = "Never call add_paragraph or add_heading on doc.sections. Only use doc.sections[0] for margins. Add all content directly on doc: doc.add_heading(), doc.add_paragraph(), doc.add_table()."
elif "not supported between instances" in error_str or "has no attribute 'column_letter'" in error_str:
retry_instruction = "Excel column width error. Use safe loop: for col in ws.columns, col_letter = col[0].column_letter, loop cells with if cell.value is not None check, track max with if/else not max(), set ws.column_dimensions[col_letter].width = max_len + 4."
elif "has no attribute 'canvas'" in error_str or "has no attribute 'canv'" in error_str:
retry_instruction = "Use Canvas-only API: pdfcanvas.Canvas(), c.drawString(), c.rect(), c.showPage(), c.save(). No Platypus."
else:
retry_instruction = "Code crashed with: " + error_str[:300] + ". Regenerate clean simple version under 80 lines. No emoji."
cleanup_output_files()
retry_start = time.time()
try:
messages.append({"role": "user", "content": retry_instruction})
retry_response = client.chat_completion(
messages=messages,
max_tokens=4096,
temperature=0.1
)
raw_code = retry_response.choices[0].message.content.strip()
file_path, raw_code = extract_file_path(raw_code)
code = clean_code(raw_code)
run_code(code)
output_file = find_output_file(file_path, generated_after=retry_start)
if output_file:
history = history + [
{"role": "user", "content": user_request},
{"role": "assistant", "content": "Document ready (auto-fixed)! Download it on the right."}
]
return history, output_file, code
else:
history = history + [
{"role": "user", "content": user_request},
{"role": "assistant", "content": "Auto-retry ran but no file found. Check code panel."}
]
return history, None, code
except Exception as retry_e:
error_msg = "Error (after auto-retry): " + str(retry_e) + "\n\n" + traceback.format_exc()
history = history + [
{"role": "user", "content": user_request},
{"role": "assistant", "content": error_msg}
]
return history, None, ""
with gr.Blocks(title="Document Generator") as demo:
gr.Markdown(
"# Document Generation Specialist\n"
"> Generate professional **PDFs, Excel sheets, PowerPoint decks, and Word docs** -- just describe what you want."
)
with gr.Row():
with gr.Column(scale=2):
chatbot = gr.Chatbot(
label="Conversation",
height=450,
show_label=True,
)
with gr.Row():
user_input = gr.Textbox(
placeholder="E.g. 'Create a Q2 sales report PDF with a revenue bar chart and summary table'",
label="What document do you need?",
lines=3,
scale=4
)
submit_btn = gr.Button(
"Generate",
variant="primary",
scale=1,
elem_classes=["generate-btn"]
)
with gr.Column(scale=1):
gr.Markdown("### Download Your Document")
file_output = gr.File(label="Generated Document")
gr.Markdown("### Generated Code")
code_output = gr.Code(
language="python",
label="Code used to create your document",
lines=18,
interactive=False
)
gr.Examples(
examples=[
"Create a simple PDF with title Hello DataFlow in large bold dark blue font centered on the page",
"Make an Excel file with columns Name, Department, Salary, Start Date and 6 rows. Bold dark blue headers. Freeze top row. Auto-width columns.",
"Create a Word document titled Project Summary with headings Overview and Next Steps, 2 paragraphs each, and a 3-column table Task/Owner/Status with 4 rows.",
"Generate a 3-slide PowerPoint for DataFlow Inc. Use slide_layouts[6] and add_textbox() only. Slide 1: dark blue background white title. Slide 2: agenda 4 bullets. Slide 3: thank you.",
"Create a PDF invoice for DataFlow Inc. Client: Acme Corp. 3 line items totaling $4,300. Include subtotal, 8% tax, and grand total.",
"Build an Excel expense tracker with Month, Category, Description, Amount columns and 8 rows. Add totals row.",
"Create a 3-page PDF for DataFlow Inc. Page 1: cover via draw_cover callback. Page 2: executive summary + revenue table Jan-Jun. Page 3: SafeBarChart. Use build_doc() with draw_cover as onFirstPage. Start elements with PageBreak().",
],
inputs=user_input,
label="Try These Examples"
)
history_state = gr.State([])
def run(msg, hist):
return generate_document(msg, hist)
submit_btn.click(
fn=run,
inputs=[user_input, history_state],
outputs=[chatbot, file_output, code_output],
).then(lambda: "", outputs=user_input)
user_input.submit(
fn=run,
inputs=[user_input, history_state],
outputs=[chatbot, file_output, code_output],
).then(lambda: "", outputs=user_input)
chatbot.change(fn=lambda x: x, inputs=chatbot, outputs=history_state)
demo.launch(
theme=gr.themes.Soft(primary_hue="blue"),
css="footer { display: none !important; } .generate-btn { min-height: 80px !important; }"
)