spagestic commited on
Commit
c2408ff
·
verified ·
1 Parent(s): 9938557

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -26
app.py CHANGED
@@ -1,35 +1,172 @@
1
  """
2
- PDF Text Extractor Application
3
- Main entry point for the PDF Text Extractor application.
4
  """
5
 
6
- import os
7
- from dotenv import load_dotenv
8
- from ui import create_interface
9
- from utils.config import check_api_key, get_app_config
 
 
 
 
10
 
11
- def main():
12
- """Main function to launch the application."""
 
13
 
14
- # Load environment variables from .env file
15
- load_dotenv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- # Check for API key
18
- check_api_key()
 
 
 
 
 
19
 
20
- # Create and launch the interface
21
- interface = create_interface()
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- # Get application configuration
24
- app_config = get_app_config()
25
-
26
- # Launch with appropriate settings
27
- interface.launch(
28
- # server_port=app_config["server_port"],
29
- debug=app_config["debug"],
30
- quiet=app_config["quiet"],
31
- max_file_size=app_config["max_file_size"]
32
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- if __name__ == "__main__":
35
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Interface creation module for PDF Text Extractor.
3
+ Defines the Gradio interface components and layout.
4
  """
5
 
6
+ import gradio as gr
7
+ from pdf_text_extractor import PDFTextExtractor
8
+ from ui.handlers import copy_text, download_text, process_images_for_display
9
+ from ui.components import (
10
+ create_header, create_upload_section, create_action_button,
11
+ create_text_display, create_action_buttons, create_image_gallery, apply_custom_css
12
+ )
13
+ from gradio_pdf import PDF # <-- Added import
14
 
15
+ def create_dummy_interface() -> gr.Blocks:
16
+ """
17
+ Create a simple interface for when the API key is not configured.
18
 
19
+ Returns:
20
+ gr.Blocks: Gradio interface with disabled functionality
21
+ """
22
+ with gr.Blocks(title="PDF Text Extractor") as interface:
23
+ gr.Markdown("""
24
+ # 🔍 PDF Text Extractor
25
+
26
+ ⚠️ **API key not configured.** Please set MISTRAL_API_KEY environment variable and restart the application.
27
+ """)
28
+
29
+ with gr.Row():
30
+ gr.File(label="Upload PDF", file_types=[".pdf"])
31
+
32
+ with gr.Row():
33
+ gr.Button("Extract Text", variant="primary", interactive=False)
34
+
35
+ with gr.Row():
36
+ gr.Textbox(
37
+ label="Extracted Text",
38
+ lines=10,
39
+ value="API key not configured. Text extraction is unavailable.",
40
+ interactive=False
41
+ )
42
+
43
+ with gr.Row():
44
+ gr.Textbox(
45
+ label="Status",
46
+ lines=2,
47
+ value="❌ MISTRAL_API_KEY environment variable is not set. Please set it and restart the application."
48
+ )
49
+
50
+ with gr.Row():
51
+ gr.Button("📋 Copy to Clipboard", interactive=False)
52
+ gr.Button("📥 Download as Text File", interactive=False)
53
 
54
+ return interface
55
+
56
+
57
+
58
+ def create_main_interface(extractor: PDFTextExtractor) -> gr.Blocks:
59
+ """
60
+ Create the main application interface.
61
 
62
+ Args:
63
+ extractor: PDFTextExtractor instance
64
+
65
+ Returns:
66
+ gr.Blocks: Gradio interface with full functionality """
67
+ # Make the extractor a local function attribute
68
+ def process_pdf_wrapper(pdf_file):
69
+ """Process PDF with the extractor from closure"""
70
+ extracted_text, status, images_data = extractor.extract_text_from_pdf(pdf_file)
71
+ # Get PDF file path for image extraction
72
+ pdf_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file if pdf_file else None
73
+ gallery_images = process_images_for_display(images_data, pdf_path)
74
+ return extracted_text, status, gallery_images
75
 
76
+ with gr.Blocks(title="🔍 PDF Text Extractor", theme=gr.themes.Soft()) as interface:
77
+ create_header()
78
+ # Add file upload section
79
+ with gr.Row():
80
+ pdf_input = create_upload_section()
81
+ # Add PDF viewer for uploaded file
82
+ with gr.Row():
83
+ pdf_viewer = PDF(label="PDF Preview", height=500)
84
+ # Add extract button
85
+ with gr.Row():
86
+ submit_btn = create_action_button()
87
+ # Add status display
88
+ with gr.Row():
89
+ status_output = gr.Textbox(
90
+ label="Status",
91
+ lines=2,
92
+ placeholder="Upload a PDF to see status..."
93
+ )
94
+ # Create tabs for text and images
95
+ with gr.Tabs():
96
+ with gr.TabItem("Extracted Text"):
97
+ text_output = gr.Textbox(
98
+ label="Extracted Text",
99
+ lines=15,
100
+ max_lines=30,
101
+ placeholder="Extracted text will appear here...",
102
+ show_copy_button=True
103
+ )
104
+ with gr.Row():
105
+ copy_btn, download_btn = create_action_buttons()
106
+ with gr.TabItem("Extracted Images"):
107
+ image_gallery = create_image_gallery()
108
+ image_info = gr.Markdown("Images extracted from the PDF will appear here.")
109
+ # Set up function calls
110
+ # Update PDF viewer when a file is uploaded
111
+ pdf_input.change(
112
+ fn=lambda f: f,
113
+ inputs=pdf_input,
114
+ outputs=pdf_viewer
115
+ )
116
+ submit_btn.click(
117
+ fn=process_pdf_wrapper,
118
+ inputs=[pdf_input],
119
+ outputs=[text_output, status_output, image_gallery]
120
+ )
121
+ copy_btn.click(
122
+ fn=copy_text,
123
+ inputs=text_output,
124
+ outputs=None,
125
+ js="""
126
+ function(text) {
127
+ if (text) {
128
+ navigator.clipboard.writeText(text);
129
+ // Show a temporary notification
130
+ var notification = document.createElement('div');
131
+ notification.textContent = 'Text copied to clipboard!';
132
+ notification.style.position = 'fixed';
133
+ notification.style.bottom = '20px';
134
+ notification.style.left = '50%';
135
+ notification.style.transform = 'translateX(-50%)';
136
+ notification.style.padding = '10px 20px';
137
+ notification.style.background = '#4CAF50';
138
+ notification.style.color = 'white';
139
+ notification.style.borderRadius = '4px';
140
+ notification.style.zIndex = '1000';
141
+ document.body.appendChild(notification);
142
+ setTimeout(function() {
143
+ document.body.removeChild(notification);
144
+ }, 2000);
145
+ }
146
+ return text;
147
+ }
148
+ """
149
+ )
150
+ download_btn.click(
151
+ fn=download_text,
152
+ inputs=text_output,
153
+ outputs=gr.File(label="Download", elem_id="download_output"),
154
+ show_progress=False
155
+ )
156
+ apply_custom_css()
157
+ return interface
158
 
159
+ def create_interface() -> gr.Blocks:
160
+ """
161
+ Create and configure the Gradio interface.
162
+
163
+ Returns:
164
+ gr.Blocks: Configured Gradio interface
165
+ """
166
+ # Initialize the PDF extractor
167
+ try:
168
+ extractor = PDFTextExtractor()
169
+ return create_main_interface(extractor)
170
+ except ValueError as e:
171
+ # Create a dummy interface if API key is missing
172
+ return create_dummy_interface()