import google.generativeai as genai import os import gradio as gr import tempfile import json api_key = os.getenv("GEMINI_API_KEY") genai.configure(api_key=api_key) def figureTableEqAnalyser(filePath): myfile = genai.upload_file(filePath) s="give the answer in the given JSON schema for every page having figure, table or equations accurately.\n" prompt = ''' [ { "pageNumber":{ "type": "integer", "description":"Indicates if the page number of the pdf if present else -1" }, "Figure": [ { "figureNumber":{ "type":"integer", "description":"Indicates the figure number if present else -1" }, "details":{ "isPresent": { "type": "bool", "description": "Indicates if there is a figure present in the image." }, "inTextDescription": { "type": "bool", "description": "Indicates if there is a description of the figure present in the main text other than caption." }, "captionLocation":{ "type": "enum", "enum": ["Above", "Below", "None"], "description": "Indicates the location of the table caption ('above', 'below', or 'none')." }, "inCaptionDescription": { "type": "bool", "description": "Indicates if the caption description is about the table." } }, "coordinates":{ "type": "Array", "description": "give the coordinates for the bounding box for this Table in the order: x, y, height, width" } } ], "Equation": [ { "equationNumber":{ "type": "integer", "description":"Indicates if there is a number of the equation if present else -1" }, "details":{ "isPresent": { "type": "bool", "description": "Indicates if there is an equation present in the image." }, "inTextDescription": { "type": "bool", "description": "Indicates if there is a description of the equation present in the main text other than caption." } }, "coordinates":{ "type": "Array", "description": "give the coordinates for the bounding box for this Table in the order: x, y, height, width" } } ], "Table": [ { "tableNumber":{ "type":"integer", "description":"Indicates if there is a number of the equation if present else -1" }, "details":{ "isPresent": { "type": "bool", "description": "Indicates if there is a table present in the image." }, "inTextDescription": { "type": "bool", "description": "Indicates if there is a description of the table present in the main text other than caption." }, "captionLocation": { "type": "enum", "enum": ["Above", "Below", "None"], "description": "Indicates the location of the table caption ('above', 'below', or 'none')." }, "inCaptionDescription": { "type": "bool", "description": "Indicates if the caption description is about the table." } }, "coordinates":{ "type": "Array", "description": "give the coordinates for the bounding box for this Table in the order: x, y, height, width" } } ] } ] ''' model = genai.GenerativeModel(model_name="gemini-1.5-pro-exp-0827") response = model.generate_content([s+prompt, myfile]) resp = response.text n=len(resp) resp=resp[8:n-3] ans = json.loads(resp) return ans # ------------------------------ # Gradio Interface # ------------------------------ def process_upload(file): """ Process the uploaded PDF file and return analysis results and annotated PDF. """ # print(file.name) if file is None: return json.dumps({"error": "No file uploaded"}, indent=2), None # # Create a temporary file to work with print("Here in Process upload") # with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_input: # temp_input.write(file) # temp_input_path = temp_input.name # print(temp_input_path) print("test 1") temp_input = tempfile.NamedTemporaryFile(delete=True, suffix='.pdf') temp_input.write(file) temp_input_path = temp_input.name print(temp_input_path) # Analyze the PDF results = figureTableEqAnalyser(temp_input_path) print(results) results_json = json.dumps(results, indent=2) # Clean up the temporary input file os.unlink(temp_input_path) return results_json def create_interface(): with gr.Blocks(title="PDF Analyzer") as interface: gr.Markdown("# PDF Analyzer") gr.Markdown("Upload a PDF document to analyze its structure, references, language, and more.") with gr.Row(): file_input = gr.File( label="Upload PDF", file_types=[".pdf"], type="binary" ) with gr.Row(): analyze_btn = gr.Button("Analyze PDF") with gr.Row(): results_output = gr.JSON( label="Analysis Results", show_label=True ) analyze_btn.click( fn=process_upload, inputs=[file_input], outputs=[results_output] ) return interface if __name__ == "__main__": interface = create_interface() interface.launch( share=True, server_port=None )