Spaces:

protae5544
/

pdfk

Sleeping

App Files Files Community

pdfk / app.py

protae5544

Update app.py

31f4117 verified 7 months ago

raw

history blame contribute delete

31.5 kB

	# -- coding: utf-8 --

	# ==============================================================================
	# PDF Form Filler & Template Generator with AI Enhancements
	#
	# คำอธิบาย:
	# - เครื่องมือนี้สร้างขึ้นด้วย Gradio สำหรับจัดการไฟล์ PDF และ CSV
	# - ความสามารถหลัก:
	# 1. สร้าง Template (CSV/JSON) จากฟอร์ม PDF ที่มีอยู่
	# 2. เติมข้อมูลจากไฟล์ CSV ลงในฟอร์ม PDF ทีละหลายๆ ไฟล์
	# 3. หาก PDF ไม่มีฟอร์ม จะสร้าง PDF ใหม่จากข้อมูลในแต่ละแถวของ CSV
	# 4. (ทางเลือก) ใช้ AI และ OCR เพื่อแปลงข้อมูลจากรูปภาพเป็น CSV
	# 5. (ทางเลือก) ใช้ AI ช่วยแนะนำการจับคู่คอลัมน์ CSV กับช่องใน PDF
	# 6. (ทางเลือก) ใช้ AI ช่วยตรวจสอบและทำความสะอาดข้อมูลก่อนสร้าง PDF
	#
	# การติดตั้ง Dependencies:
	# - pip install gradio pandas PyPDF2 reportlab
	# - สำหรับฟีเจอร์ AI/OCR (ทางเลือก):
	# - pip install Pillow numpy opencv-python pytesseract
	# - ต้องติดตั้ง Tesseract OCR Engine ในระบบของคุณและตั้งค่า PATH ให้ถูกต้อง
	# - https://github.com/tesseract-ocr/tesseract
	# - สำหรับฟีเจอร์ SambaNova AI (ทางเลือก):
	# - pip install 'gradio_client>=0.12.0'
	#
	# ==============================================================================

	import gradio as gr
	import pandas as pd
	import json
	import io
	import zipfile
	from datetime import datetime
	import traceback
	import tempfile
	import os
	import sys
	import subprocess

	# --- ตรวจสอบและติดตั้ง Dependencies ---
	try:
	from PyPDF2 import PdfReader, PdfWriter
	from reportlab.pdfgen import canvas
	from reportlab.lib.pagesizes import letter
	from reportlab.pdfbase import pdfmetrics
	from reportlab.pdfbase.ttfonts import TTFont
	print("Dependencies หลักถูกติดตั้งเรียบร้อยแล้ว")
	except ImportError:
	print("กำลังติดตั้ง dependencies ที่จำเป็น: PyPDF2, reportlab, pandas")
	subprocess.check_call([sys.executable, "-m", "pip", "install", "PyPDF2", "reportlab", "pandas"])
	from PyPDF2 import PdfReader, PdfWriter
	from reportlab.pdfgen import canvas
	from reportlab.lib.pagesizes import letter

	# --- Dependencies เสริมสำหรับ AI และ OCR (จะแจ้งเตือนถ้าไม่มี) ---
	try:
	from PIL import Image
	import numpy as np
	import cv2
	import pytesseract
	AI_OCR_ENABLED = True
	print("Dependencies สำหรับ AI/OCR พร้อมใช้งาน")
	except ImportError:
	AI_OCR_ENABLED = False
	print("คำเตือน: ไม่พบ Dependencies สำหรับ AI/OCR (Pillow, numpy, opencv-python, pytesseract)")
	print("ฟังก์ชันที่เกี่ยวกับรูปภาพและ OCR จะไม่สามารถใช้งานได้")
	print("ติดตั้งด้วย: pip install Pillow numpy opencv-python pytesseract และติดตั้ง Tesseract engine")

	try:
	from gradio_client import Client
	SAMBANOVA_AI_ENABLED = True
	print("Dependencies สำหรับ SambaNova AI พร้อมใช้งาน")
	except ImportError:
	SAMBANOVA_AI_ENABLED = False
	print("คำเตือน: ไม่พบ Gradio Client (pip install 'gradio_client>=0.12.0')")
	print("ฟังก์ชันที่ต้องใช้ AI Model จะไม่สามารถใช้งานได้")


	# ==============================================================================
	# ส่วนของฟังก์ชันหลัก (Core Functions)
	# ==============================================================================

	def analyze_pdf_fields(pdf_path):
	"""วิเคราะห์ฟิลด์ใน PDF และคืนค่าเป็น Dictionary"""
	try:
	reader = PdfReader(pdf_path)
	all_fields = {}
	# ตรวจสอบจาก AcroForm
	if reader.trailer.get("/Root") and reader.trailer["/Root"].get("/AcroForm"):
	acro_form = reader.trailer["/Root"]["/AcroForm"]
	if "/Fields" in acro_form:
	for field in acro_form["/Fields"]:
	field_obj = field.get_object()
	if "/T" in field_obj:
	field_name = str(field_obj["/T"]).strip("()")
	field_type = str(field_obj.get("/FT", "Unknown"))
	field_value = str(field_obj.get("/V", "")).strip("()")
	all_fields[field_name] = {
	'type': field_type,
	'default_value': field_value,
	'method': 'AcroForm'
	}
	# ตรวจสอบจาก Annotations ในแต่ละหน้า
	for page_num, page in enumerate(reader.pages):
	if "/Annots" in page:
	for annotation in page["/Annots"]:
	annot_obj = annotation.get_object()
	if annot_obj.get("/Subtype") == "/Widget" and "/T" in annot_obj:
	field_name = str(annot_obj["/T"]).strip("()")
	if field_name not in all_fields: # เพิ่มเฉพาะที่ยังไม่มี
	field_type = str(annot_obj.get("/FT", "Widget"))
	field_value = str(annot_obj.get("/V", "")).strip("()")
	all_fields[field_name] = {
	'type': field_type,
	'default_value': field_value,
	'page': page_num + 1,
	'method': 'Annotation'
	}
	return all_fields
	except Exception as e:
	return {"error": str(e)}

	def generate_csv_template(pdf_fields, num_rows=5):
	"""สร้าง CSV template จาก PDF fields"""
	if not pdf_fields or "error" in pdf_fields:
	return None, "ไม่สามารถสร้าง CSV template ได้"
	template_data = {'id': list(range(1, num_rows + 1))}
	for field_name in pdf_fields.keys():
	if field_name and field_name.strip():
	clean_name = field_name.strip()
	sample_value = f"ข้อมูลสำหรับ {clean_name} {{}}"
	template_data[clean_name] = [sample_value.format(i) for i in range(1, num_rows + 1)]
	df = pd.DataFrame(template_data)
	return df, "สร้าง CSV template สำเร็จ"

	def generate_json_template(pdf_fields):
	"""สร้าง JSON template จาก PDF fields"""
	if not pdf_fields or "error" in pdf_fields:
	return None, "ไม่สามารถสร้าง JSON template ได้"
	template = {
	"pdf_info": {"total_fields": len(pdf_fields), "generation_time": datetime.now().isoformat()},
	"fields": {},
	"sample_data": []
	}
	for field_name, field_info in pdf_fields.items():
	if field_name and field_name.strip():
	template["fields"][field_name.strip()] = field_info
	for i in range(1, 4):
	sample_record = {"id": i}
	for field_name in template["fields"].keys():
	sample_record[field_name] = f"ข้อมูลตัวอย่าง {i}"
	template["sample_data"].append(sample_record)
	return template, "สร้าง JSON template สำเร็จ"

	def create_template_files(pdf_file, num_rows, progress=gr.Progress()):
	"""สร้างไฟล์ template (CSV, JSON, README) และรวมเป็น ZIP"""
	if pdf_file is None:
	return None, "❌ กรุณาอัพโหลดไฟล์ PDF ก่อน"

	progress(0, desc="กำลังวิเคราะห์ PDF...")
	try:
	pdf_fields = analyze_pdf_fields(pdf_file.name)
	if not pdf_fields or "error" in pdf_fields:
	return None, "❌ ไม่พบ Form Fields ใน PDF หรือไฟล์เสียหาย"

	progress(0.3, desc="กำลังสร้าง CSV template...")
	csv_df, _ = generate_csv_template(pdf_fields, num_rows)

	progress(0.6, desc="กำลังสร้าง JSON template...")
	json_template, _ = generate_json_template(pdf_fields)

	if csv_df is None or json_template is None:
	return None, "❌ ไม่สามารถสร้างไฟล์ template ได้"

	progress(0.8, desc="กำลังบีบอัดไฟล์เป็น ZIP...")
	zip_buffer = io.BytesIO()
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_f:
	csv_buffer = io.StringIO()
	csv_df.to_csv(csv_buffer, index=False, encoding='utf-8-sig')
	zip_f.writestr(f"template_{timestamp}.csv", csv_buffer.getvalue())

	json_str = json.dumps(json_template, ensure_ascii=False, indent=2)
	zip_f.writestr(f"template_{timestamp}.json", json_str)

	readme_content = f"""# PDF Form Template Files
	Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
	PDF Fields Found: {len(pdf_fields)}
	{chr(10).join([f"- {name}" for name in pdf_fields.keys()])}
	"""
	zip_f.writestr("README.txt", readme_content)

	zip_buffer.seek(0)

	# บันทึกไฟล์ ZIP ชั่วคราวเพื่อให้ Gradio ส่งให้ผู้ใช้ได้
	temp_dir = tempfile.gettempdir()
	zip_filename = f"pdf_templates_{timestamp}.zip"
	temp_zip_path = os.path.join(temp_dir, zip_filename)
	with open(temp_zip_path, "wb") as f:
	f.write(zip_buffer.getvalue())

	progress(1, desc="สร้างไฟล์สำเร็จ!")
	result_msg = f"✅ สร้าง template สำเร็จ!\n- พบ {len(pdf_fields)} fields\n- CSV มี {num_rows} แถวตัวอย่าง"

	return temp_zip_path, result_msg

	except Exception as e:
	return None, f"❌ เกิดข้อผิดพลาด: {e}\n{traceback.format_exc()}"

	def fill_pdf_form(pdf_path, field_data):
	"""เติมข้อมูลลงในฟอร์มของ PDF"""
	reader = PdfReader(pdf_path)
	writer = PdfWriter()
	writer.append_pages_from_reader(reader)

	# เติมข้อมูลในฟอร์ม
	for page in writer.pages:
	try:
	writer.update_page_form_field_values(page, field_data, auto_regenerate=False)
	except Exception:
	# บางครั้ง field อยูในระดับ root
	pass
	try: # ลองเติมที่ root อีกครั้ง
	writer.update_page_form_field_values(writer.pages[0], field_data)
	except:
	pass

	output_buffer = io.BytesIO()
	writer.write(output_buffer)
	output_buffer.seek(0)
	return output_buffer.getvalue()

	def create_simple_pdf(data_row, filename):
	"""สร้าง PDF ใหม่แบบง่ายๆ กรณีที่ PDF ต้นฉบับไม่มีฟอร์ม"""
	buffer = io.BytesIO()
	# ใช้ font ที่รองรับภาษาไทย
	try:
	pdfmetrics.registerFont(TTFont('THSarabunNew', 'THSarabunNew.ttf'))
	font_name = 'THSarabunNew'
	except:
	font_name = 'Helvetica' # Fallback

	p = canvas.Canvas(buffer, pagesize=letter)
	width, height = letter

	p.setFont(font_name, 16)
	p.drawString(50, height - 50, f"เอกสาร: {filename.replace('.pdf', '')}")
	p.line(50, height - 60, 550, height - 60)

	y_position = height - 90
	p.setFont(font_name, 12)
	for column, value in data_row.items():
	if pd.notna(value) and str(value).strip():
	text = f"{str(column).strip()}: {str(value).strip()}"
	try:
	p.drawString(50, y_position, text)
	except:
	safe_text = text.encode('latin-1', 'replace').decode('latin-1')
	p.drawString(50, y_position, safe_text)
	y_position -= 20
	if y_position < 50:
	p.showPage()
	p.setFont(font_name, 12)
	y_position = height - 50

	p.save()
	buffer.seek(0)
	return buffer.getvalue()

	def read_csv_safe(csv_file):
	"""อ่านไฟล์ CSV โดยลองหลาย encoding และ separator เพื่อความยืดหยุ่น"""
	encodings = ['utf-8-sig', 'utf-8', 'cp874', 'tis-620']
	separators = [',', ';', '\t']

	# ใช้ .name เพราะ Gradio ส่งมาเป็น object ที่มี path อยู่ใน .name
	filepath = csv_file.name

	for encoding in encodings:
	for sep in separators:
	try:
	df = pd.read_csv(filepath, encoding=encoding, sep=sep, engine='python')
	if len(df.columns) > 1:
	return df, None
	except Exception:
	continue
	return None, "ไม่สามารถอ่านไฟล์ CSV ได้ ลองตรวจสอบ Encoding (ควรเป็น UTF-8) และ Separator (ควรเป็น ,)"

	def process_pdf_csv(pdf_file, csv_file, filename_column, file_prefix, use_form_fields, progress=gr.Progress()):
	"""ฟังก์ชันหลักสำหรับประมวลผล PDF และ CSV"""
	if not pdf_file or not csv_file:
	return None, "❌ กรุณาอัพโหลดทั้งไฟล์ PDF และ CSV"

	try:
	df, csv_error = read_csv_safe(csv_file)
	if df is None:
	return None, f"❌ ไม่สามารถอ่าน CSV ได้: {csv_error}"

	pdf_path = pdf_file.name
	pdf_fields = analyze_pdf_fields(pdf_path)
	has_form_fields = bool(pdf_fields and "error" not in pdf_fields)

	generated_pdfs = {}
	log = []
	total_rows = len(df)

	for index, row in df.iterrows():
	progress((index + 1) / total_rows, f"ประมวลผลแถวที่ {index + 1}/{total_rows}")

	# สร้างชื่อไฟล์
	if filename_column and filename_column in df.columns and pd.notna(row[filename_column]):
	safe_name = "".join(c for c in str(row[filename_column]) if c.isalnum() or c in (' ', '-', '_')).strip()
	filename = f"{file_prefix}_{safe_name}.pdf"
	else:
	filename = f"{file_prefix}_{index + 1:03d}.pdf"

	row_data = row.to_dict()

	try:
	if use_form_fields and has_form_fields:
	# เติมฟอร์ม PDF ที่มีอยู่
	pdf_content = fill_pdf_form(pdf_path, row_data)
	status = "เติมฟอร์มสำเร็จ"
	else:
	# สร้าง PDF ใหม่
	pdf_content = create_simple_pdf(row_data, filename)
	status = "สร้าง PDF ใหม่" if not has_form_fields else "สร้าง PDF ใหม่ (Fallback)"

	generated_pdfs[filename] = pdf_content
	log.append(f"✅ {filename}: {status}")
	except Exception as e:
	log.append(f"❌ {filename}: เกิดข้อผิดพลาด - {e}")

	if not generated_pdfs:
	return None, "❌ ไม่สามารถสร้าง PDF ได้เลย\n" + "\n".join(log)

	# สร้างไฟล์ ZIP
	zip_buffer = io.BytesIO()
	with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_f:
	for filename, pdf_content in generated_pdfs.items():
	zip_f.writestr(filename, pdf_content)
	zip_f.writestr("processing_log.txt", "\n".join(log))
	zip_buffer.seek(0)

	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	zip_filename = f"generated_pdfs_{timestamp}.zip"
	temp_zip_path = os.path.join(tempfile.gettempdir(), zip_filename)
	with open(temp_zip_path, 'wb') as f:
	f.write(zip_buffer.getvalue())

	result_message = f"✅ สร้าง PDF สำเร็จ {len(generated_pdfs)} ไฟล์!\nดูรายละเอียดใน processing_log.txt"
	return temp_zip_path, result_message

	except Exception as e:
	return None, f"❌ เกิดข้อผิดพลาดร้ายแรง: {e}\n{traceback.format_exc()}"


	# ==============================================================================
	# ส่วนของฟังก์ชัน AI และ OCR (ทางเลือก)
	# ==============================================================================

	def init_sambanova_ai():
	"""Initialize SambaNova AI model client."""
	if not SAMBANOVA_AI_ENABLED:
	print("SambaNova AI is disabled.")
	return None
	try:
	# ใช้ gradio_client.Client แทน gr.load ที่อาจมีปัญหา
	client = Client("sambanova/Llama-3-8B-Instruct", hf_token="YOUR_HF_TOKEN") # ใส่ Hugging Face Token ของคุณ
	print("SambaNova AI client initialized successfully.")
	return client
	except Exception as e:
	print(f"Error initializing SambaNova AI: {e}")
	return None

	def extract_text_from_image(image_file):
	"""Extract text from an image file using Tesseract OCR."""
	if not AI_OCR_ENABLED or image_file is None:
	return "", "OCR is not available or no image provided."
	try:
	image = Image.open(image_file.name)
	# ตั้งค่า Tesseract ให้ตรวจจับทั้งภาษาไทยและอังกฤษ
	custom_config = r'--oem 3 --psm 6 -l tha+eng'
	text = pytesseract.image_to_string(image, config=custom_config)
	return text.strip(), "Text extracted successfully."
	except Exception as e:
	return "", f"OCR Error: {e}. ตรวจสอบว่าติดตั้ง Tesseract Engine ถูกต้อง"

	def image_to_csv_with_ai(image_file, progress=gr.Progress()):
	"""Convert data from an image to a CSV file using OCR and AI for structuring."""
	if not AI_OCR_ENABLED:
	return None, "❌ ฟังก์ชันนี้ต้องการ AI/OCR dependencies"
	if image_file is None:
	return None, "❌ กรุณาอัพโหลดรูปภาพ"

	progress(0.2, desc="กำลังอ่านข้อความจากรูปภาพ (OCR)...")
	raw_text, ocr_status = extract_text_from_image(image_file)
	if not raw_text:
	return None, f"❌ ไม่พบข้อความในรูปภาพ: {ocr_status}"

	progress(0.5, desc="กำลังใช้ AI จัดโครงสร้างข้อมูล...")
	ai_client = init_sambanova_ai()
	if not ai_client:
	return None, "❌ ไม่สามารถเชื่อมต่อ AI Model ได้"

	prompt = f"""
	From the following text, extract key-value pairs. The output should be only the data in 'key: value' format, one per line.
	Example:
	Name: John Doe
	Address: 123 Main St
	Date: 2024-01-15

	Text to process:
	---
	{raw_text}
	---
	"""
	try:
	# การเรียกใช้งาน API ของ gradio_client
	result = ai_client.predict(message=prompt, api_name="/chat")

	progress(0.8, desc="กำลังสร้างไฟล์ CSV...")
	lines = result.strip().split('\n')
	data = [line.split(':', 1) for line in lines if ':' in line]

	if not data:
	return None, "AI ไม่สามารถจัดโครงสร้างข้อมูลได้"

	df = pd.DataFrame(data, columns=['Field', 'Value']).set_index('Field').T

	csv_buffer = io.StringIO()
	df.to_csv(csv_buffer, index=False, encoding='utf-8-sig')

	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	temp_csv_path = os.path.join(tempfile.gettempdir(), f"extracted_data_{timestamp}.csv")
	with open(temp_csv_path, 'w', encoding='utf-8-sig') as f:
	f.write(csv_buffer.getvalue())

	return temp_csv_path, "✅ แปลงรูปภาพเป็น CSV สำเร็จ"
	except Exception as e:
	return None, f"❌ เกิดข้อผิดพลาดระหว่างประมวลผลด้วย AI: {e}"


	# ==============================================================================
	# ส่วนของ UI Analysis Functions
	# ==============================================================================

	def analyze_pdf_info(pdf_file):
	"""วิเคราะห์และแสดงข้อมูลสรุปของไฟล์ PDF บน UI"""
	if pdf_file is None:
	return "ยังไม่มีไฟล์ PDF"
	try:
	reader = PdfReader(pdf_file.name)
	info = f"📄 ข้อมูล PDF:\n- จำนวนหน้า: {len(reader.pages)}\n"
	pdf_fields = analyze_pdf_fields(pdf_file.name)
	if pdf_fields and "error" not in pdf_fields:
	info += f"- พบ Form Fields: {len(pdf_fields)} ช่อง (จะใช้วิธีเติมฟอร์ม)\n"
	info += "\n🏷️ ตัวอย่างชื่อ Fields:\n"
	for name in list(pdf_fields.keys())[:10]:
	info += f" - `{name}`\n"
	if len(pdf_fields) > 10:
	info += f" - ... และอีก {len(pdf_fields) - 10} fields\n"
	else:
	info += "- ไม่พบ Form Fields (จะใช้วิธีสร้าง PDF ใหม่ทับลงบนกระดาษเปล่า)\n"
	return info
	except Exception as e:
	return f"❌ ไม่สามารถวิเคราะห์ PDF: {e}"

	def analyze_csv_info(csv_file):
	"""วิเคราะห์และแสดงข้อมูลสรุปของไฟล์ CSV และอัปเดต Dropdown"""
	if csv_file is None:
	return "ยังไม่มีไฟล์ CSV", gr.update(choices=[], value=None)
	try:
	df, error = read_csv_safe(csv_file)
	if df is None:
	return f"❌ ไม่สามารถอ่าน CSV: {error}", gr.update(choices=[], value=None)

	info = f"📋 ข้อมูล CSV:\n- จำนวนแถว: {len(df)}\n- จำนวนคอลัมน์: {len(df.columns)}\n"
	info += "\n📝 รายชื่อคอลัมน์:\n"
	for col in df.columns[:15]:
	info += f" - `{col}`\n"
	if len(df.columns) > 15:
	info += f" - ... และอีก {len(df.columns) - 15} คอลัมน์\n"

	# อัปเดต Dropdown สำหรับเลือกคอลัมน์ชื่อไฟล์
	return info, gr.update(choices=df.columns.tolist(), value=None)
	except Exception as e:
	return f"❌ ไม่สามารถวิเคราะห์ CSV: {e}", gr.update(choices=[], value=None)


	# ==============================================================================
	# ส่วนของการสร้าง Gradio Interface
	# ==============================================================================

	def create_interface():
	with gr.Blocks(title="PDF Form Filler & Template Generator", theme=gr.themes.Soft()) as app:
	gr.Markdown("# 📄 เครื่องมือจัดการ PDF จากข้อมูล CSV")
	gr.Markdown("รองรับการ สร้าง Template จาก PDF, เติมข้อมูล จาก CSV, และ แปลงรูปภาพเป็น CSV ด้วย AI")

	with gr.Tabs():
	# --- Tab 1: สร้าง Template ---
	with gr.TabItem("🔄 1. สร้าง Template"):
	gr.Markdown("## สร้าง CSV/JSON Template จาก PDF ที่มี Form Fields")
	with gr.Row():
	with gr.Column(scale=1):
	template_pdf = gr.File(label="📄 อัพโหลด PDF ต้นฉบับ", file_types=[".pdf"])
	num_sample_rows = gr.Slider(label="จำนวนแถวตัวอย่างใน CSV", minimum=1, maximum=50, value=5, step=1)
	generate_template_btn = gr.Button("🚀 สร้าง Template", variant="primary")
	with gr.Column(scale=2):
	template_pdf_info = gr.Markdown("อัพโหลด PDF เพื่อดูข้อมูล...")
	template_result_file = gr.File(label="📦 ดาวน์โหลดไฟล์ Template (ZIP)", interactive=False)
	template_result_message = gr.Markdown()

	# --- Tab 2: เติมข้อมูล PDF ---
	with gr.TabItem("📝 2. เติมข้อมูล PDF"):
	gr.Markdown("## เติมข้อมูลลงใน PDF จากไฟล์ CSV")
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📂 1. อัพโหลดไฟล์")
	pdf_file = gr.File(label="📄 PDF Form ต้นฉบับ", file_types=[".pdf"])
	csv_file = gr.File(label="📊 CSV ข้อมูล", file_types=[".csv"])

	gr.Markdown("### ⚙️ 2. ตั้งค่า")
	use_form_fields = gr.Checkbox(label="พยายามเติมข้อมูลลงใน Form Fields ที่มีอยู่", value=True)
	file_prefix = gr.Textbox(label="คำนำหน้าชื่อไฟล์ (Prefix)", value="Document")
	filename_column = gr.Dropdown(label="เลือกคอลัมน์ที่จะใช้เป็นชื่อไฟล์ (ถ้ามี)", interactive=True)

	fill_form_btn = gr.Button("🚀 เริ่มเติมข้อมูล", variant="primary")

	with gr.Column(scale=2):
	pdf_info = gr.Markdown("อัพโหลด PDF เพื่อดูข้อมูล...")
	csv_info = gr.Markdown("อัพโหลด CSV เพื่อดูข้อมูล...")
	gr.Markdown("---")
	filled_result_file = gr.File(label="📦 ดาวน์โหลด PDF ทั้งหมด (ZIP)", interactive=False)
	filled_result_message = gr.Markdown()

	# --- Tab 3: Image to CSV (AI) ---
	with gr.TabItem("🖼️ 3. แปลงรูปภาพเป็น CSV (AI)"):
	gr.Markdown("## ใช้ OCR และ AI เพื่อดึงข้อมูลจากรูปภาพและสร้างเป็นไฟล์ CSV")
	with gr.Row():
	with gr.Column(scale=1):
	image_upload = gr.File(label="🖼️ อัพโหลดรูปภาพ (บิล, เอกสาร, ฯลฯ)", file_types=["image"])
	image_to_csv_btn = gr.Button("🤖 แปลงเป็น CSV", variant="primary", visible=AI_OCR_ENABLED)
	if not AI_OCR_ENABLED:
	gr.Markdown("⚠️ ฟังก์ชันนี้ถูกปิดใช้งานเนื่องจากไม่พบ Library ที่จำเป็น (Pillow, OpenCV, Pytesseract)")

	with gr.Column(scale=2):
	image_csv_output = gr.File(label="📄 ดาวน์โหลดไฟล์ CSV ที่ได้", interactive=False)
	image_csv_message = gr.Markdown()

	# --- Event Handlers ---
	template_pdf.change(fn=analyze_pdf_info, inputs=template_pdf, outputs=template_pdf_info)
	generate_template_btn.click(
	fn=create_template_files,
	inputs=[template_pdf, num_sample_rows],
	outputs=[template_result_file, template_result_message]
	)

	pdf_file.change(fn=analyze_pdf_info, inputs=pdf_file, outputs=pdf_info)
	csv_file.change(fn=analyze_csv_info, inputs=csv_file, outputs=[csv_info, filename_column])

	fill_form_btn.click(
	fn=process_pdf_csv,
	inputs=[pdf_file, csv_file, filename_column, file_prefix, use_form_fields],
	outputs=[filled_result_file, filled_result_message]
	)

	if AI_OCR_ENABLED:
	image_to_csv_btn.click(
	fn=image_to_csv_with_ai,
	inputs=[image_upload],
	outputs=[image_csv_output, image_csv_message]
	)

	return app

	# --- Launch the application ---
	if __name__ == "__main__":
	# ลองหา font ไทย ถ้าไม่มีจะได้ไม่ error ตอนสร้าง PDF
	try:
	from reportlab.pdfbase import pdfmetrics
	from reportlab.pdfbase.ttfonts import TTFont
	# สำหรับ Windows
	pdfmetrics.registerFont(TTFont('THSarabunNew', 'C:/Windows/Fonts/THSARI.TTF'))
	print("ลงทะเบียน Font 'THSarabunNew' สำหรับ ReportLab สำเร็จ")
	except:
	print("คำเตือน: ไม่พบ Font 'THSarabunNew' ในระบบ อาจทำให้การสร้าง PDF ภาษาไทยมีปัญหา")
	print("แนะนำให้ติดตั้งฟอนต์ TH SarabunPSK หรือปรับแก้ path ของฟอนต์ในโค้ด")

	app = create_interface()
	app.launch(debug=True)