spagestic commited on
Commit
f9a64f4
·
1 Parent(s): c2408ff

refactor: Simplify main application entry point and remove unused interface functions

Browse files
Files changed (1) hide show
  1. app.py +26 -163
app.py CHANGED
@@ -1,172 +1,35 @@
1
  """
2
- Interface creation module for PDF Text Extractor.
3
- Defines the Gradio interface components and layout.
4
  """
5
 
6
- import gradio as gr
7
- from pdf_text_extractor import PDFTextExtractor
8
- from ui.handlers import copy_text, download_text, process_images_for_display
9
- from ui.components import (
10
- create_header, create_upload_section, create_action_button,
11
- create_text_display, create_action_buttons, create_image_gallery, apply_custom_css
12
- )
13
- from gradio_pdf import PDF # <-- Added import
14
 
15
- def create_dummy_interface() -> gr.Blocks:
16
- """
17
- Create a simple interface for when the API key is not configured.
18
 
19
- Returns:
20
- gr.Blocks: Gradio interface with disabled functionality
21
- """
22
- with gr.Blocks(title="PDF Text Extractor") as interface:
23
- gr.Markdown("""
24
- # 🔍 PDF Text Extractor
25
-
26
- ⚠️ **API key not configured.** Please set MISTRAL_API_KEY environment variable and restart the application.
27
- """)
28
-
29
- with gr.Row():
30
- gr.File(label="Upload PDF", file_types=[".pdf"])
31
-
32
- with gr.Row():
33
- gr.Button("Extract Text", variant="primary", interactive=False)
34
-
35
- with gr.Row():
36
- gr.Textbox(
37
- label="Extracted Text",
38
- lines=10,
39
- value="API key not configured. Text extraction is unavailable.",
40
- interactive=False
41
- )
42
-
43
- with gr.Row():
44
- gr.Textbox(
45
- label="Status",
46
- lines=2,
47
- value="❌ MISTRAL_API_KEY environment variable is not set. Please set it and restart the application."
48
- )
49
-
50
- with gr.Row():
51
- gr.Button("📋 Copy to Clipboard", interactive=False)
52
- gr.Button("📥 Download as Text File", interactive=False)
53
 
54
- return interface
55
-
56
-
57
-
58
- def create_main_interface(extractor: PDFTextExtractor) -> gr.Blocks:
59
- """
60
- Create the main application interface.
61
 
62
- Args:
63
- extractor: PDFTextExtractor instance
64
-
65
- Returns:
66
- gr.Blocks: Gradio interface with full functionality """
67
- # Make the extractor a local function attribute
68
- def process_pdf_wrapper(pdf_file):
69
- """Process PDF with the extractor from closure"""
70
- extracted_text, status, images_data = extractor.extract_text_from_pdf(pdf_file)
71
- # Get PDF file path for image extraction
72
- pdf_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file if pdf_file else None
73
- gallery_images = process_images_for_display(images_data, pdf_path)
74
- return extracted_text, status, gallery_images
75
 
76
- with gr.Blocks(title="🔍 PDF Text Extractor", theme=gr.themes.Soft()) as interface:
77
- create_header()
78
- # Add file upload section
79
- with gr.Row():
80
- pdf_input = create_upload_section()
81
- # Add PDF viewer for uploaded file
82
- with gr.Row():
83
- pdf_viewer = PDF(label="PDF Preview", height=500)
84
- # Add extract button
85
- with gr.Row():
86
- submit_btn = create_action_button()
87
- # Add status display
88
- with gr.Row():
89
- status_output = gr.Textbox(
90
- label="Status",
91
- lines=2,
92
- placeholder="Upload a PDF to see status..."
93
- )
94
- # Create tabs for text and images
95
- with gr.Tabs():
96
- with gr.TabItem("Extracted Text"):
97
- text_output = gr.Textbox(
98
- label="Extracted Text",
99
- lines=15,
100
- max_lines=30,
101
- placeholder="Extracted text will appear here...",
102
- show_copy_button=True
103
- )
104
- with gr.Row():
105
- copy_btn, download_btn = create_action_buttons()
106
- with gr.TabItem("Extracted Images"):
107
- image_gallery = create_image_gallery()
108
- image_info = gr.Markdown("Images extracted from the PDF will appear here.")
109
- # Set up function calls
110
- # Update PDF viewer when a file is uploaded
111
- pdf_input.change(
112
- fn=lambda f: f,
113
- inputs=pdf_input,
114
- outputs=pdf_viewer
115
- )
116
- submit_btn.click(
117
- fn=process_pdf_wrapper,
118
- inputs=[pdf_input],
119
- outputs=[text_output, status_output, image_gallery]
120
- )
121
- copy_btn.click(
122
- fn=copy_text,
123
- inputs=text_output,
124
- outputs=None,
125
- js="""
126
- function(text) {
127
- if (text) {
128
- navigator.clipboard.writeText(text);
129
- // Show a temporary notification
130
- var notification = document.createElement('div');
131
- notification.textContent = 'Text copied to clipboard!';
132
- notification.style.position = 'fixed';
133
- notification.style.bottom = '20px';
134
- notification.style.left = '50%';
135
- notification.style.transform = 'translateX(-50%)';
136
- notification.style.padding = '10px 20px';
137
- notification.style.background = '#4CAF50';
138
- notification.style.color = 'white';
139
- notification.style.borderRadius = '4px';
140
- notification.style.zIndex = '1000';
141
- document.body.appendChild(notification);
142
- setTimeout(function() {
143
- document.body.removeChild(notification);
144
- }, 2000);
145
- }
146
- return text;
147
- }
148
- """
149
- )
150
- download_btn.click(
151
- fn=download_text,
152
- inputs=text_output,
153
- outputs=gr.File(label="Download", elem_id="download_output"),
154
- show_progress=False
155
- )
156
- apply_custom_css()
157
- return interface
158
 
159
- def create_interface() -> gr.Blocks:
160
- """
161
- Create and configure the Gradio interface.
162
-
163
- Returns:
164
- gr.Blocks: Configured Gradio interface
165
- """
166
- # Initialize the PDF extractor
167
- try:
168
- extractor = PDFTextExtractor()
169
- return create_main_interface(extractor)
170
- except ValueError as e:
171
- # Create a dummy interface if API key is missing
172
- return create_dummy_interface()
 
1
  """
2
+ PDF Text Extractor Application
3
+ Main entry point for the PDF Text Extractor application.
4
  """
5
 
6
+ import os
7
+ from dotenv import load_dotenv
8
+ from ui import create_interface
9
+ from utils.config import check_api_key, get_app_config
 
 
 
 
10
 
11
+ def main():
12
+ """Main function to launch the application."""
 
13
 
14
+ # Load environment variables from .env file
15
+ load_dotenv()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Check for API key
18
+ check_api_key()
 
 
 
 
 
19
 
20
+ # Create and launch the interface
21
+ interface = create_interface()
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # Get application configuration
24
+ app_config = get_app_config()
25
+
26
+ # Launch with appropriate settings
27
+ interface.launch(
28
+ # server_port=app_config["server_port"],
29
+ debug=app_config["debug"],
30
+ quiet=app_config["quiet"],
31
+ max_file_size=app_config["max_file_size"]
32
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ if __name__ == "__main__":
35
+ main()