image2tikz / app.py
vermen's picture
Update app.py
0e43e78 verified
import subprocess
import os
from PIL import Image
import requests
##################################
def write_txt(file_name,text):
with open(file_name, "w", encoding="utf-8") as file:
file.write(text)
##################################
def load_txt(file_name):
with open(file_name, "r", encoding="utf-8") as file:
text = file.read()
return text
####
#os.system("apt-get install -y texlive-latex-base texlive-fonts-recommended texlive-latex-extra")
#os.system("apt-get install -y --no-install-recommends texlive-latex-base texlive-latex-recommended texlive-pictures texlive-science texlive-latex-extra poppler-utils")
#print("all latex installed")
####
def extract_document_content(latex_code):
"""
Extract the content between \begin{document} and \end{document} in LaTeX code.
Args:
latex_code (str): The LaTeX code to extract from
Returns:
str: The extracted content, or None if no document environment is found
"""
start_tag = "\\begin{document}"
end_tag = "\\end{document}"
start_index = latex_code.find(start_tag)
end_index = latex_code.find(end_tag)
if start_index == -1 or end_index == -1:
return None
# Add the length of the start tag to get the content right after it
start_content_index = start_index + len(start_tag)
# Extract the content between the tags
document_content = latex_code[start_content_index:end_index]
return document_content.strip()
##################################
from google import genai
GEMINI_API_KEY = "AIzaSyB8pCfu9a3Bx7L6v1JyTdMMNf4W55B5D08"
client = genai.Client(api_key=GEMINI_API_KEY)
from google.genai import types
def generate_tex(file_path):
print(file_path)
myfile = client.files.upload(file=file_path)
result = client.models.generate_content(
model="gemini-2.0-flash",#"gemini-1.5-pro",#
contents=[
myfile,
"\n\n",
"Act as an expert in tikzcd. Make the tikcd code of the image, include packages for special characters.\n"+
"The document class is always standalone, and include: \\usetikzlibrary{decorations.pathmorphing}, \\usepackage{stmaryrd}, \\usepackage{amssymb} after the tikcd library."+
"Do not make any suggestion nor commentaries."
],
config=types.GenerateContentConfig(temperature=0.1)
)
#tex_code = load_txt("latex_code.tex")
return result.text
def transform_to_pdf(tex_content):
# API endpoint for LaTeXOnline.cc
url = "https://www.latexonline.cc/compile"
# Send LaTeX source for compilation
response = requests.post(url, data={"text": tex_content, "target": "pdf"}, verify=False) # Disable SSL verification
if response.status_code == 200:
pdf_url = response.url # Get the PDF URL
print("PDF compiled successfully! Downloading from:", pdf_url)
# Download the compiled PDF
pdf_response = requests.get(pdf_url, verify=False)
if pdf_response.status_code == 200:
with open("latex_code.pdf", "wb") as f:
f.write(pdf_response.content)
print("PDF saved as 'latex_code.pdf'.")
else:
print("Error downloading PDF:", pdf_response.status_code)
else:
print("Error:", response.status_code, response.text)
def generate_pdf_png(latex_code):
## output latex_code.pdf
# remove
latex_code = latex_code.replace("```tex","")
latex_code = latex_code.replace("```latex","")
latex_code = latex_code.replace("```","")
write_txt("latex_code.tex",latex_code)
print("file saved")
# Compile with pdflatex
#os.system("pdflatex latex_code.tex")
transform_to_pdf(latex_code)
# Convert PDF to image using pdf2image (needs poppler installed)
print("already have a pdf")
# save as pdf
import fitz # pymupdf
# Open the PDF file
pdf_document = fitz.open("latex_code.pdf")
# Convert each page to an image
pix = pdf_document[0].get_pixmap() # Render page
pix.save("latex_code.png") # Save as JPG
#image = images[0]
#image.save("latex_code.png")
print("Image saved as PNG")
def generate_results(file_path):
tikcd_code = generate_tex(file_path) # filepath of the image, as input for the generative model
generate_pdf_png(tikcd_code)
return ("./latex_code.png",extract_document_content(tikcd_code))
#
print("all was initialized\n")
#write_txt("aux.tex","Hello world")
#
import gradio as gr
app = gr.Interface(
fn= generate_results,
inputs=gr.Image(type="filepath"),
outputs=[gr.Image(label="Imagen latex"),gr.TextArea(label="Código latex",lines=30)]
)
####
#port = int(os.environ.get("PORT", 8080))
app.launch(share=True)#server_name="0.0.0.0", server_port=port,debug=True)