qtpi's picture
Update app.py
60d2934 verified
import google.generativeai as genai
import os
import gradio as gr
import tempfile
import json
api_key = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=api_key)
def figureTableEqAnalyser(filePath):
myfile = genai.upload_file(filePath)
s="give the answer in the given JSON schema for every page having figure, table or equations accurately.\n"
prompt = '''
[
{
"pageNumber":{
"type": "integer",
"description":"Indicates if the page number of the pdf if present else -1"
},
"Figure": [
{
"figureNumber":{
"type":"integer",
"description":"Indicates the figure number if present else -1"
},
"details":{
"isPresent": {
"type": "bool",
"description": "Indicates if there is a figure present in the image."
},
"inTextDescription": {
"type": "bool",
"description": "Indicates if there is a description of the figure present in the main text other than caption."
},
"captionLocation":{
"type": "enum",
"enum": ["Above", "Below", "None"],
"description": "Indicates the location of the table caption ('above', 'below', or 'none')."
},
"inCaptionDescription": {
"type": "bool",
"description": "Indicates if the caption description is about the table."
}
},
"coordinates":{
"type": "Array",
"description": "give the coordinates for the bounding box for this Table in the order: x, y, height, width"
}
}
],
"Equation": [
{
"equationNumber":{
"type": "integer",
"description":"Indicates if there is a number of the equation if present else -1"
},
"details":{
"isPresent": {
"type": "bool",
"description": "Indicates if there is an equation present in the image."
},
"inTextDescription": {
"type": "bool",
"description": "Indicates if there is a description of the equation present in the main text other than caption."
}
},
"coordinates":{
"type": "Array",
"description": "give the coordinates for the bounding box for this Table in the order: x, y, height, width"
}
}
],
"Table": [
{
"tableNumber":{
"type":"integer",
"description":"Indicates if there is a number of the equation if present else -1"
},
"details":{
"isPresent": {
"type": "bool",
"description": "Indicates if there is a table present in the image."
},
"inTextDescription": {
"type": "bool",
"description": "Indicates if there is a description of the table present in the main text other than caption."
},
"captionLocation": {
"type": "enum",
"enum": ["Above", "Below", "None"],
"description": "Indicates the location of the table caption ('above', 'below', or 'none')."
},
"inCaptionDescription": {
"type": "bool",
"description": "Indicates if the caption description is about the table."
}
},
"coordinates":{
"type": "Array",
"description": "give the coordinates for the bounding box for this Table in the order: x, y, height, width"
}
}
]
}
]
'''
model = genai.GenerativeModel(model_name="gemini-1.5-pro-exp-0827")
response = model.generate_content([s+prompt, myfile])
resp = response.text
n=len(resp)
resp=resp[8:n-3]
ans = json.loads(resp)
return ans
# ------------------------------
# Gradio Interface
# ------------------------------
def process_upload(file):
"""
Process the uploaded PDF file and return analysis results and annotated PDF.
"""
# print(file.name)
if file is None:
return json.dumps({"error": "No file uploaded"}, indent=2), None
# # Create a temporary file to work with
print("Here in Process upload")
# with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_input:
# temp_input.write(file)
# temp_input_path = temp_input.name
# print(temp_input_path)
print("test 1")
temp_input = tempfile.NamedTemporaryFile(delete=True, suffix='.pdf')
temp_input.write(file)
temp_input_path = temp_input.name
print(temp_input_path)
# Analyze the PDF
results = figureTableEqAnalyser(temp_input_path)
print(results)
results_json = json.dumps(results, indent=2)
# Clean up the temporary input file
os.unlink(temp_input_path)
return results_json
def create_interface():
with gr.Blocks(title="PDF Analyzer") as interface:
gr.Markdown("# PDF Analyzer")
gr.Markdown("Upload a PDF document to analyze its structure, references, language, and more.")
with gr.Row():
file_input = gr.File(
label="Upload PDF",
file_types=[".pdf"],
type="binary"
)
with gr.Row():
analyze_btn = gr.Button("Analyze PDF")
with gr.Row():
results_output = gr.JSON(
label="Analysis Results",
show_label=True
)
analyze_btn.click(
fn=process_upload,
inputs=[file_input],
outputs=[results_output]
)
return interface
if __name__ == "__main__":
interface = create_interface()
interface.launch(
share=True,
server_port=None
)