fmegahed commited on
Commit
5573196
·
verified ·
1 Parent(s): 45db913

Updating app.R to version 2.0

Browse files
Files changed (1) hide show
  1. app.R +1165 -143
app.R CHANGED
@@ -1,244 +1,1266 @@
1
  library(shiny)
2
  library(ellmer)
3
  library(purrr)
 
 
 
 
 
 
4
 
5
  num_example_fields = 2
6
 
 
 
7
  # Define UI for the app
8
  ui = shiny::fluidPage(
9
- shiny::fluidRow(
10
- shiny::column(12,
11
- shiny::div(
12
- style = "background-color: #f8f9fa; padding: 10px; margin-bottom: 15px; border-radius: 5px;",
13
- shiny::div(
14
- style = "display: flex; justify-content: space-between; align-items: center;",
15
- shiny::div(
16
- shiny::strong("NHTSA Recall Information Extraction Tool"),
17
- shiny::p("Version 1.0 - April 2025")
18
- ),
19
- shiny::div(
20
- shiny::p("Authors: Fadel M. Megahed, Ying-Ju (Tessa) Chen"),
21
- shiny::p("Contact: fmegahed@miamioh.edu")
22
- )
23
- )
24
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  )
26
  ),
27
-
28
- shiny::titlePanel("NHTSA Recall Information Extraction"),
29
-
30
- # Add introduction panel
31
- shiny::fluidRow(
32
- shiny::column(12,
33
- shiny::wellPanel(
34
- shiny::h4("How to Use This App"),
35
- shiny::p("This app extracts structured data from NHTSA recall notices using AI. Follow these steps:"),
36
- shiny::tags$ol(
37
- shiny::tags$li("Paste recall text containing information you want to extract"),
38
- shiny::tags$li("Specify the number of fields to extract"),
39
- shiny::tags$li("Define each field with a label and description"),
40
- shiny::tags$li("Click 'Extract Data' to process")
41
- ),
42
- shiny::p("Example: For extracting recall information, create fields like 'manufacturer', 'models', and 'defect_summary' with clear descriptions."),
43
- shiny::p("You can process multiple recalls at once: separate each recall text with a double line break (press Enter twice).")
44
- )
 
 
 
 
 
 
 
 
 
45
  )
46
  ),
47
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  shiny::sidebarLayout(
49
  shiny::sidebarPanel(
50
- shiny::textAreaInput(
51
- "input_text",
52
- "Enter recall text to extract from:",
53
- rows = 10,
54
- placeholder = "Paste your recall text here...\n\nSeparate multiple recalls with double line breaks (press Enter twice).\n\nExample: 'Ford Motor Company is recalling certain 2021-2022 vehicles due to faulty brakes.'"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  ),
56
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  shiny::numericInput(
58
- "num_fields",
59
- "Number of fields to extract:",
60
- value = num_example_fields,
61
- min = 1,
 
 
 
62
  max = 10
63
  ),
64
-
65
- # Add help text
66
- shiny::helpText("Define each field with a clear label (e.g., 'manufacturer') and description (e.g., 'The name of the company recalling the vehicles')."),
67
-
 
 
 
 
 
 
 
 
 
 
68
  shiny::uiOutput("fields_ui"),
69
-
70
- # Example button
71
- shiny::actionButton("load_example", "Load Examples", class = "btn-info"),
72
- shiny::actionButton("extract_btn", "Extract Data", class = "btn-primary")
 
 
 
 
 
73
  ),
74
-
75
  shiny::mainPanel(
76
- shiny::h3("Extracted Recall Data"),
77
- shiny::p("Results will appear here after extraction"),
78
- shiny::tableOutput("extracted_table"),
79
-
80
- # Add tips section
81
- shiny::wellPanel(
82
- shiny::h4("Tips for Better Results"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  shiny::tags$ul(
84
- shiny::tags$li("Use specific field descriptions to guide the AI"),
85
- shiny::tags$li("Start with more fields and remove unnecessary ones later"),
86
- shiny::tags$li("If results are inaccurate, try rephrasing your field descriptions"),
87
- shiny::tags$li("To process multiple recalls, separate each with a double line break"),
88
- shiny::tags$li("Each recall text should contain complete information for all fields")
89
  )
90
  ),
91
-
92
- # Add API key notice
93
- shiny::wellPanel(
94
- shiny::h4("Note:"),
95
- shiny::p("To ensure the timeliness of results (since this is hosted on a CPU), we utilize `gpt-4o-mini` for this demo.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  )
97
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  )
99
  )
100
 
101
  # Define server logic required to generate dynamic UI and extract data
102
  server = function(input, output, session) {
103
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  # Load example data
105
  shiny::observeEvent(input$load_example, {
106
  example_text = "Ford Motor Company (Ford) is recalling certain 2021-2022 Bronco vehicles equipped with rearview camera systems and 8-inch screen displays. The rearview camera image may still be displayed after a backing event has ended. As such, these vehicles fail to comply with the requirements of Federal Motor Vehicle Safety Standard number 111, \"Rear Visibility.\"\n\nHonda (American Honda Motor Co.) is recalling certain 2022-2025 Acura MDX Type-S, 2023-2025 Honda Pilot, and 2021-2025 Acura TLX Type-S vehicles. A software error in the fuel injection electronic control unit (FI-ECU) may cause an engine stall or a loss of power."
 
 
 
 
 
107
  shiny::updateTextAreaInput(session, "input_text", value = example_text)
108
-
109
  # Set up example fields
110
  shiny::updateNumericInput(session, "num_fields", value = num_example_fields)
 
 
 
 
 
 
111
  })
112
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  # Dynamically generate UI elements for each field's label and description
114
  output$fields_ui = shiny::renderUI({
115
  n = input$num_fields
 
116
  if (is.null(n) || n < 1) return(NULL)
117
-
118
- # Example field definitions for NHTSA recalls
119
- example_labels = c("manufacturer", "defect_summary", "models", "model_years", "component", "fmvss_number", "root_cause", "risk")
120
- example_descs = c(
121
- "The name of the company recalling the vehicles.",
122
- "Summary of the main defect.",
123
- "List of affected vehicle models.",
124
- "List of model years affected.",
125
- "The part or system affected by the defect.",
126
- "The FMVSS number mentioned, if any.",
127
- "The root cause of the defect.",
128
- "The risk or consequence posed by the defect."
129
- )
130
-
 
 
 
 
 
 
 
131
  fields = purrr::map(1:n, function(i) {
132
- # Set default values based on examples if available
133
- default_label = if(i <= length(example_labels)) example_labels[i] else paste0("field", i)
134
- default_desc = if(i <= length(example_descs)) example_descs[i] else paste0("Description for field ", i)
135
-
136
- shiny::tagList(
 
 
 
 
 
 
 
 
 
 
137
  shiny::textInput(
138
- paste0("field_label_", i),
139
- paste("Field", i, "Label:"),
140
  value = default_label
141
  ),
142
  shiny::textInput(
143
- paste0("field_desc_", i),
144
- paste("Field", i, "Description:"),
145
  value = default_desc
146
- ),
147
- shiny::hr()
148
  )
149
  })
150
  do.call(shiny::tagList, fields)
151
  })
152
-
153
  # Build a custom type_object based on user-specified fields
154
  create_type_object = shiny::reactive({
155
  n = input$num_fields
156
  if (is.null(n) || n < 1) return(NULL)
157
-
158
  # Build a list of field definitions
159
  type_list = list()
160
- for(i in 1:n){
161
  label = input[[paste0("field_label_", i)]]
162
- desc = input[[paste0("field_desc_", i)]]
163
- if (!is.null(label) && label != ""){
164
  type_list[[label]] = ellmer::type_string(desc, required = FALSE)
165
  }
166
  }
167
  # Dynamically create the type object
168
  do.call(ellmer::type_object, type_list)
169
  })
170
-
171
  # When the extract button is clicked, perform extraction
172
  shiny::observeEvent(input$extract_btn, {
173
- shiny::req(input$input_text)
174
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  # Show processing indicator
176
- shiny::showNotification("Processing extraction request...", type = "message", duration = NULL, id = "extract_notif")
177
-
 
 
 
 
 
 
 
 
178
  custom_type_object = create_type_object()
179
-
180
  # Initialize the chat object using the OpenAI API key from your environment
181
  tryCatch({
182
  # Check if API key is available
183
  if (Sys.getenv("OPENAI_API_KEY") == "") {
184
  stop("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
185
  }
186
-
187
  chat = ellmer::chat_openai(
188
- model = 'gpt-4o-mini',
189
- api_key = Sys.getenv("OPENAI_API_KEY")
190
- )
191
-
192
- # Extraction function
193
- extract_fn = function(x, chat_object, custom_type_object) {
194
- return(chat_object$extract_data(x, type = custom_type_object))
195
- }
196
-
197
- # Split text by double linebreaks to process multiple entities
198
- text_blocks = unlist(strsplit(input$input_text, "\n\n"))
199
- text_blocks = text_blocks[text_blocks != ""] # Remove empty blocks
200
-
201
- # Process each text block
202
  all_results = list()
203
-
204
- for (i in seq_along(text_blocks)) {
205
- result = extract_fn(text_blocks[i], chat, custom_type_object)
206
- if (is.list(result)) {
207
- # Add a block_id column to identify the source text block
208
- result$block_id = i
209
- all_results[[i]] = result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  }
211
  }
212
-
213
  # Combine all results into a single data frame
214
  if (length(all_results) > 0) {
215
  combined_results = do.call(rbind, lapply(all_results, function(x) {
216
  # Ensure all results have the same columns by converting to data frame
217
  as.data.frame(x)
218
  }))
219
-
 
 
 
220
  # Render the output as a table
221
- output$extracted_table = shiny::renderTable({
222
- combined_results
223
- }, rownames = TRUE)
 
 
 
 
 
 
224
  } else {
225
  # Handle the case when no valid results are returned
 
226
  output$extracted_table = shiny::renderTable({
227
- data.frame(message = "No valid data could be extracted")
228
  })
229
  }
230
-
231
  # Remove notification
232
  shiny::removeNotification(id = "extract_notif")
233
- shiny::showNotification("Extraction complete!", type = "message", duration = 3)
234
-
 
 
 
 
 
 
235
  }, error = function(e) {
236
  # Handle errors
237
  shiny::removeNotification(id = "extract_notif")
238
- shiny::showNotification(paste("Error:", e$message), type = "error", duration = NULL)
 
 
 
 
 
 
 
 
239
  })
240
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  }
242
 
243
  # Run the Shiny app
244
- shiny::shinyApp(ui = ui, server = server)
 
1
  library(shiny)
2
  library(ellmer)
3
  library(purrr)
4
+ library(pdftools)
5
+ library(magick)
6
+ library(base64enc)
7
+
8
+ # Maximum pages for image-based extraction (free app limit)
9
+ MAX_IMAGE_PAGES = 5
10
 
11
  num_example_fields = 2
12
 
13
+
14
+
15
  # Define UI for the app
16
  ui = shiny::fluidPage(
17
+ # Add Miami theme CSS and favicon
18
+ shiny::tags$head(
19
+ shiny::tags$link(rel = "icon", type = "image/svg+xml", href = "favicon.svg"),
20
+ shiny::tags$link(rel = "stylesheet", type = "text/css", href = "miami-theme.css"),
21
+ shiny::tags$title("AI-Powered Text Extraction Tool")
22
+ ),
23
+
24
+ # Custom header with Miami branding
25
+ shiny::div(
26
+ class = "app-header",
27
+ shiny::div(
28
+ class = "header-content",
29
+ shiny::div(
30
+ class = "header-left",
31
+ shiny::tags$h1("AI-Powered Text Extraction Tool"),
32
+ shiny::p(class = "subtitle", "Extract Structured Data from Text, Documents, and Images")
33
+ ),
34
+ shiny::div(
35
+ class = "header-right",
36
+ shiny::p("Version 2.0 | January 2026"),
37
+ shiny::p(
38
+ shiny::tags$strong("Authors: "),
39
+ "Fadel M. Megahed, Ying-Ju (Tessa) Chen, Allison Jones-Farmer, Ibrahim Yousif, and Inez M. Zwetsloot"
40
+ ),
41
+ shiny::p(
42
+ shiny::tags$strong("Contact: "),
43
+ shiny::tags$a(
44
+ href = "mailto:fmegahed@miamioh.edu",
45
+ style = "color: #EFDB72;",
46
+ "fmegahed@miamioh.edu"
47
+ )
48
+ )
49
+ )
50
+ )
51
+ ),
52
+
53
+ # University logos section
54
+ shiny::div(
55
+ class = "logo-container",
56
+ shiny::tags$img(
57
+ src = "miami-logo.png",
58
+ alt = "Miami University Logo",
59
+ style = "height: 55px;"
60
+ ),
61
+ shiny::div(class = "logo-divider"),
62
+ shiny::tags$img(
63
+ src = "university-of-dayton-vector-logo.png",
64
+ alt = "University of Dayton Logo",
65
+ style = "height: 50px;"
66
+ ),
67
+ shiny::div(class = "logo-divider"),
68
+ shiny::tags$img(
69
+ src = "uva-compacte-logo.png",
70
+ alt = "University of Amsterdam Logo",
71
+ style = "height: 50px;"
72
+ ),
73
+ shiny::div(
74
+ style = "margin-left: auto; font-size: 0.85em; color: #666;",
75
+ shiny::p(
76
+ style = "margin: 0;",
77
+ "A collaboration between",
78
+ shiny::tags$strong("Miami University,"),
79
+ "the",
80
+ shiny::tags$strong("University of Dayton,"),
81
+ "and the",
82
+ shiny::tags$strong("University of Amsterdam")
83
+ )
84
+ )
85
+ ),
86
+
87
+ # How to use section
88
+ shiny::div(
89
+ class = "how-to-use",
90
+ shiny::div(
91
+ style = "display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;",
92
+ shiny::tags$h4(
93
+ style = "margin: 0;",
94
+ shiny::icon("circle-info"),
95
+ " How to Use This App"
96
+ ),
97
+ shiny::tags$button(
98
+ id = "open-video-modal",
99
+ class = "btn btn-video-tutorial",
100
+ shiny::icon("play-circle"),
101
+ " Watch Video Tutorial"
102
+ )
103
+ ),
104
+ shiny::tags$h5(
105
+ style = "color: #C41230; margin-top: 0;",
106
+ shiny::icon("bolt"),
107
+ " Quick Demo"
108
+ ),
109
+ shiny::p(
110
+ "Click the ",
111
+ shiny::tags$strong("'Load Examples'"),
112
+ " button to load two sample NHTSA recall notices with pre-configured extraction fields. ",
113
+ "Then click ",
114
+ shiny::tags$strong("'Extract Data'"),
115
+ " to see the AI extract structured information from the text. ",
116
+ "You can also add more fields, change field labels, and modify descriptions. ",
117
+ shiny::tags$em(
118
+ "(Note: Field labels only affect how data is stored and displayed, not extraction performance.)"
119
+ )
120
+ ),
121
+ shiny::tags$h5(
122
+ style = "color: #C41230; margin-top: 15px;",
123
+ shiny::icon("file-import"),
124
+ " Input Methods"
125
+ ),
126
+ shiny::p(
127
+ "Choose from six input methods to provide your source content:"
128
+ ),
129
+ shiny::tags$ul(
130
+ style = "margin-bottom: 10px;",
131
+ shiny::tags$li(
132
+ shiny::tags$strong("Demo Data:"),
133
+ " Try the app with pre-loaded NHTSA vehicle recall examples."
134
+ ),
135
+ shiny::tags$li(
136
+ shiny::tags$strong("Paste Text:"),
137
+ " Directly paste text content. Separate multiple items with double line breaks."
138
+ ),
139
+ shiny::tags$li(
140
+ shiny::tags$strong("Text File:"),
141
+ " Upload .txt, .csv (single column), or .md files."
142
+ ),
143
+ shiny::tags$li(
144
+ shiny::tags$strong("Readable PDF:"),
145
+ " Upload a machine-readable PDF. Text is extracted automatically."
146
+ ),
147
+ shiny::tags$li(
148
+ shiny::tags$strong("Scanned PDF:"),
149
+ " Upload scanned/image-based PDFs. Pages are converted to images for vision processing."
150
+ ),
151
+ shiny::tags$li(
152
+ shiny::tags$strong("Upload Images:"),
153
+ " Upload images (PNG, JPEG, WebP, GIF) for AI vision-based extraction."
154
+ )
155
+ ),
156
+ shiny::p(
157
+ style = "font-size: 0.9em; color: #666;",
158
+ shiny::icon("info-circle"),
159
+ " ",
160
+ shiny::tags$em(
161
+ "Note: PDF, Image, and Scanned PDF modes are limited to 5 pages/items in this free demo."
162
+ )
163
+ ),
164
+ shiny::tags$h5(
165
+ style = "color: #C41230; margin-top: 15px;",
166
+ shiny::icon("edit"),
167
+ " Custom Fields"
168
+ ),
169
+ shiny::p(
170
+ "Customize your extraction by adjusting:"
171
+ ),
172
+ shiny::tags$ul(
173
+ style = "margin-bottom: 10px;",
174
+ shiny::tags$li("The ", shiny::tags$strong("number of fields"), " to extract"),
175
+ shiny::tags$li("Your own ", shiny::tags$strong("field labels"), " (for storage/display only)"),
176
+ shiny::tags$li("Detailed ", shiny::tags$strong("field descriptions"), " to guide the AI")
177
+ ),
178
+ shiny::p(
179
+ shiny::icon("lightbulb"),
180
+ " ",
181
+ shiny::tags$em(
182
+ "The more specific your field descriptions, the better the extraction results."
183
+ )
184
  )
185
  ),
186
+
187
+ # Video Modal
188
+ shiny::div(
189
+ id = "video-modal",
190
+ class = "video-modal",
191
+ shiny::div(
192
+ class = "video-modal-content",
193
+ shiny::tags$span(
194
+ id = "close-video-modal",
195
+ class = "video-modal-close",
196
+ shiny::HTML("&times;")
197
+ ),
198
+ shiny::tags$h4(
199
+ style = "color: #C41230; margin-top: 0; margin-bottom: 15px;",
200
+ shiny::icon("play-circle"),
201
+ " Video Tutorial"
202
+ ),
203
+ shiny::div(
204
+ class = "video-modal-wrapper",
205
+ # Placeholder - replace VIDEO_ID_PLACEHOLDER with actual YouTube video ID
206
+ shiny::tags$iframe(
207
+ id = "tutorial-video",
208
+ src = "",
209
+ allow = "accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture",
210
+ allowfullscreen = NA
211
+ )
212
+ )
213
  )
214
  ),
215
+
216
+ # JavaScript for video modal
217
+ shiny::tags$script(shiny::HTML("
218
+ var videoUrl = 'https://www.youtube.com/embed/VIDEO_ID_PLACEHOLDER';
219
+ var modal = document.getElementById('video-modal');
220
+ var openBtn = document.getElementById('open-video-modal');
221
+ var closeBtn = document.getElementById('close-video-modal');
222
+ var videoIframe = document.getElementById('tutorial-video');
223
+
224
+ openBtn.onclick = function() {
225
+ modal.style.display = 'flex';
226
+ videoIframe.src = videoUrl;
227
+ }
228
+
229
+ closeBtn.onclick = function() {
230
+ modal.style.display = 'none';
231
+ videoIframe.src = '';
232
+ }
233
+
234
+ window.onclick = function(event) {
235
+ if (event.target == modal) {
236
+ modal.style.display = 'none';
237
+ videoIframe.src = '';
238
+ }
239
+ }
240
+
241
+ document.addEventListener('keydown', function(event) {
242
+ if (event.key === 'Escape' && modal.style.display === 'flex') {
243
+ modal.style.display = 'none';
244
+ videoIframe.src = '';
245
+ }
246
+ });
247
+
248
+ // Input tab switching
249
+ function switchInputTab(tab) {
250
+ var tabs = ['demo', 'paste', 'textfile', 'pdf', 'scanned', 'image'];
251
+ tabs.forEach(function(t) {
252
+ var tabEl = document.getElementById('tab-' + t);
253
+ var panelEl = document.getElementById('panel-' + t);
254
+ if (t === tab) {
255
+ tabEl.classList.add('active');
256
+ panelEl.style.display = 'block';
257
+ } else {
258
+ tabEl.classList.remove('active');
259
+ panelEl.style.display = 'none';
260
+ }
261
+ });
262
+ // Update Shiny with the current input method
263
+ Shiny.setInputValue('input_method', tab);
264
+ }
265
+ // Initialize input method
266
+ Shiny.setInputValue('input_method', 'demo');
267
+ ")),
268
+
269
  shiny::sidebarLayout(
270
  shiny::sidebarPanel(
271
+ width = 4,
272
+ class = "sidebar-panel",
273
+
274
+ shiny::tags$h4(
275
+ style = "color: #C41230; margin-top: 0; margin-bottom: 15px;",
276
+ shiny::icon("file-alt"),
277
+ " Input Configuration"
278
+ ),
279
+
280
+ # Input method tabs (3 rows for 5 tabs)
281
+ shiny::div(
282
+ class = "input-tabs-container",
283
+ shiny::div(
284
+ class = "input-tabs",
285
+ shiny::tags$button(
286
+ id = "tab-demo",
287
+ class = "input-tab active",
288
+ onclick = "switchInputTab('demo')",
289
+ shiny::icon("flask"),
290
+ " Demo Data"
291
+ ),
292
+ shiny::tags$button(
293
+ id = "tab-paste",
294
+ class = "input-tab",
295
+ onclick = "switchInputTab('paste')",
296
+ shiny::icon("paste"),
297
+ " Paste Text"
298
+ )
299
+ ),
300
+ shiny::div(
301
+ class = "input-tabs",
302
+ shiny::tags$button(
303
+ id = "tab-textfile",
304
+ class = "input-tab",
305
+ onclick = "switchInputTab('textfile')",
306
+ shiny::icon("file-lines"),
307
+ " Text File"
308
+ ),
309
+ shiny::tags$button(
310
+ id = "tab-pdf",
311
+ class = "input-tab",
312
+ onclick = "switchInputTab('pdf')",
313
+ shiny::icon("file-pdf"),
314
+ " Readable PDF"
315
+ )
316
+ ),
317
+ shiny::div(
318
+ class = "input-tabs",
319
+ shiny::tags$button(
320
+ id = "tab-scanned",
321
+ class = "input-tab",
322
+ onclick = "switchInputTab('scanned')",
323
+ shiny::icon("file-image"),
324
+ " Scanned PDF"
325
+ ),
326
+ shiny::tags$button(
327
+ id = "tab-image",
328
+ class = "input-tab",
329
+ onclick = "switchInputTab('image')",
330
+ shiny::icon("images"),
331
+ " Upload Images"
332
+ )
333
+ )
334
  ),
335
+
336
+ # Demo data panel
337
+ shiny::div(
338
+ id = "panel-demo",
339
+ class = "input-panel",
340
+ shiny::p(
341
+ "Try the app with pre-loaded NHTSA vehicle recall data. Click the button below to load sample text and pre-configured extraction fields."
342
+ ),
343
+ shiny::actionButton(
344
+ "load_example",
345
+ shiny::tags$span(shiny::icon("play"), " Load Demo Data"),
346
+ class = "btn-primary",
347
+ style = "margin-top: 10px;"
348
+ ),
349
+ shiny::uiOutput("demo_preview_ui")
350
+ ),
351
+
352
+ # Paste text panel
353
+ shiny::div(
354
+ id = "panel-paste",
355
+ class = "input-panel",
356
+ style = "display: none;",
357
+ shiny::textAreaInput(
358
+ "input_text",
359
+ NULL,
360
+ rows = 8,
361
+ placeholder = "Paste your text content here.\n\nTo process multiple items at once, separate each block of text with a double line break (press Enter twice).\n\nIMPORTANT: Before extracting, scroll down to configure your extraction fields:\n1. Set the number of fields you need\n2. Update each field label (e.g., 'product_name', 'price')\n3. Write clear descriptions for each field\n\nThen click 'Extract Data' to see results."
362
+ )
363
+ ),
364
+
365
+ # Text file upload panel
366
+ shiny::div(
367
+ id = "panel-textfile",
368
+ class = "input-panel",
369
+ style = "display: none;",
370
+ shiny::fileInput(
371
+ "text_file",
372
+ NULL,
373
+ accept = c(".txt", ".csv", ".md"),
374
+ placeholder = "No file selected",
375
+ buttonLabel = shiny::tags$span(shiny::icon("upload"), " Browse...")
376
+ ),
377
+ shiny::helpText(
378
+ shiny::icon("info-circle"),
379
+ " Upload a text file (.txt, .csv, .md). Content will be extracted automatically."
380
+ ),
381
+ shiny::div(
382
+ class = "file-format-note",
383
+ shiny::tags$strong("File format guidelines:"),
384
+ shiny::tags$ul(
385
+ style = "margin: 5px 0 0 0; padding-left: 20px; font-size: 0.85em;",
386
+ shiny::tags$li(".txt: Plain text, separate items with double line breaks"),
387
+ shiny::tags$li(".csv: Single column of text entries (one per row)"),
388
+ shiny::tags$li(".md: Markdown file treated as plain text")
389
+ )
390
+ ),
391
+ shiny::uiOutput("textfile_preview_ui")
392
+ ),
393
+
394
+ # PDF upload panel (machine-readable)
395
+ shiny::div(
396
+ id = "panel-pdf",
397
+ class = "input-panel",
398
+ style = "display: none;",
399
+ shiny::fileInput(
400
+ "pdf_file",
401
+ NULL,
402
+ accept = ".pdf",
403
+ placeholder = "No file selected",
404
+ buttonLabel = shiny::tags$span(shiny::icon("upload"), " Browse...")
405
+ ),
406
+ shiny::helpText(
407
+ shiny::icon("info-circle"),
408
+ " Upload a machine-readable PDF. Text will be extracted automatically."
409
+ ),
410
+ shiny::div(
411
+ class = "image-limit-note",
412
+ shiny::icon("exclamation-circle"),
413
+ shiny::tags$em(paste(" Free app limit: Only the first", MAX_IMAGE_PAGES, "pages will be processed."))
414
+ ),
415
+ shiny::uiOutput("pdf_preview_ui")
416
+ ),
417
+
418
+ # Image upload panel
419
+ shiny::div(
420
+ id = "panel-image",
421
+ class = "input-panel",
422
+ style = "display: none;",
423
+ shiny::fileInput(
424
+ "image_file",
425
+ NULL,
426
+ accept = c(".png", ".jpg", ".jpeg", ".webp", ".gif"),
427
+ multiple = TRUE,
428
+ placeholder = "No file selected",
429
+ buttonLabel = shiny::tags$span(shiny::icon("upload"), " Browse...")
430
+ ),
431
+ shiny::helpText(
432
+ shiny::icon("info-circle"),
433
+ " Upload image(s) containing text to extract. Supports PNG, JPEG, WebP, and GIF."
434
+ ),
435
+ shiny::div(
436
+ class = "image-limit-note",
437
+ shiny::icon("exclamation-circle"),
438
+ shiny::tags$em(paste(" Free app limit: Maximum", MAX_IMAGE_PAGES, "images will be processed."))
439
+ ),
440
+ shiny::uiOutput("image_preview_ui")
441
+ ),
442
+
443
+ # Scanned PDF panel
444
+ shiny::div(
445
+ id = "panel-scanned",
446
+ class = "input-panel",
447
+ style = "display: none;",
448
+ shiny::fileInput(
449
+ "scanned_pdf_file",
450
+ NULL,
451
+ accept = ".pdf",
452
+ placeholder = "No file selected",
453
+ buttonLabel = shiny::tags$span(shiny::icon("upload"), " Browse...")
454
+ ),
455
+ shiny::helpText(
456
+ shiny::icon("info-circle"),
457
+ " Upload a scanned/image-based PDF. Pages will be converted to images for AI vision processing."
458
+ ),
459
+ shiny::div(
460
+ class = "image-limit-note",
461
+ shiny::icon("exclamation-circle"),
462
+ shiny::tags$em(paste(" Free app limit: Only the first", MAX_IMAGE_PAGES, "pages will be processed."))
463
+ ),
464
+ shiny::uiOutput("scanned_pdf_preview_ui")
465
+ ),
466
+
467
+ shiny::hr(class = "field-separator"),
468
+
469
  shiny::numericInput(
470
+ "num_fields",
471
+ shiny::tags$span(
472
+ shiny::icon("list-ol"),
473
+ " Number of Fields to Extract:"
474
+ ),
475
+ value = num_example_fields,
476
+ min = 1,
477
  max = 10
478
  ),
479
+
480
+ shiny::helpText(
481
+ shiny::icon("info-circle"),
482
+ " Define each field with a clear label (e.g., 'manufacturer') and description (e.g., 'The name of the company recalling the vehicles')."
483
+ ),
484
+
485
+ shiny::hr(class = "field-separator"),
486
+
487
+ shiny::tags$h5(
488
+ style = "color: #C41230; margin-bottom: 10px;",
489
+ shiny::icon("tags"),
490
+ " Field Definitions"
491
+ ),
492
+
493
  shiny::uiOutput("fields_ui"),
494
+
495
+ shiny::div(
496
+ style = "margin-top: 20px;",
497
+ shiny::actionButton(
498
+ "extract_btn",
499
+ shiny::tags$span(shiny::icon("magic"), " Extract Data"),
500
+ class = "btn-primary"
501
+ )
502
+ )
503
  ),
504
+
505
  shiny::mainPanel(
506
+ width = 8,
507
+ class = "main-panel",
508
+
509
+ shiny::div(
510
+ class = "info-card",
511
+ shiny::tags$h3(
512
+ class = "section-heading",
513
+ shiny::icon("table"),
514
+ " Structured Data Extracted Using AI"
515
+ ),
516
+ shiny::div(
517
+ class = "results-placeholder",
518
+ id = "results-placeholder",
519
+ shiny::icon("arrow-left"),
520
+ " Configure your extraction fields and click 'Extract Data' to see results here"
521
+ ),
522
+ shiny::tableOutput("extracted_table"),
523
+ shiny::uiOutput("download_btn_ui")
524
+ ),
525
+
526
+ shiny::div(
527
+ class = "tips-section",
528
+ shiny::tags$h4(shiny::icon("lightbulb"), " Tips for Better Results"),
529
  shiny::tags$ul(
530
+ shiny::tags$li("Use ", shiny::tags$strong("specific field descriptions"), " to guide the AI accurately"),
531
+ shiny::tags$li("Start with ", shiny::tags$strong("more fields"), " and remove unnecessary ones later"),
532
+ shiny::tags$li("If results are inaccurate, try ", shiny::tags$strong("rephrasing"), " your field descriptions"),
533
+ shiny::tags$li("For ", shiny::tags$strong("multiple text blocks"), ", separate each with a double line break"),
534
+ shiny::tags$li("Each text block should contain ", shiny::tags$strong("complete information"), " for all fields you want to extract")
535
  )
536
  ),
537
+
538
+ shiny::div(
539
+ class = "note-section",
540
+ shiny::tags$h4(shiny::icon("cog"), " Technical Note"),
541
+ shiny::p(
542
+ "To ensure timely results (since this is hosted on a CPU), we utilize ",
543
+ shiny::tags$code("gpt-5-mini-2025-08-07"),
544
+ " for this demo."
545
+ ),
546
+ shiny::p(
547
+ "For ",
548
+ shiny::tags$strong("complete privacy"),
549
+ ", consider using local open-weight models via ",
550
+ shiny::tags$a(
551
+ href = "https://ellmer.tidyverse.org/reference/chat_ollama.html",
552
+ target = "_blank",
553
+ shiny::tags$code("chat_ollama()")
554
+ ),
555
+ " from the ",
556
+ shiny::tags$a(
557
+ href = "https://ellmer.tidyverse.org/",
558
+ target = "_blank",
559
+ "ellmer"
560
+ ),
561
+ " library, which connects to ",
562
+ shiny::tags$a(
563
+ href = "https://ollama.com/",
564
+ target = "_blank",
565
+ "Ollama"
566
+ ),
567
+ " for running models locally on your machine."
568
+ ),
569
+ shiny::p(
570
+ "Alternatively, for higher accuracy requirements, users can leverage more performant closed models (e.g., ",
571
+ shiny::tags$em("gpt-5.2-2025-12-11"),
572
+ ", ",
573
+ shiny::tags$em("claude-opus-4-5-20251101"),
574
+ ", or ",
575
+ shiny::tags$em("gemini-3-pro-preview"),
576
+ ") depending on the application needs. ",
577
+ "Note that Claude models require ",
578
+ shiny::tags$a(
579
+ href = "https://ellmer.tidyverse.org/reference/chat_anthropic.html",
580
+ target = "_blank",
581
+ shiny::tags$code("chat_anthropic()")
582
+ ),
583
+ " and Gemini models require ",
584
+ shiny::tags$a(
585
+ href = "https://ellmer.tidyverse.org/reference/chat_google_gemini.html",
586
+ target = "_blank",
587
+ shiny::tags$code("chat_google_gemini()")
588
+ ),
589
+ ", each with their respective API keys configured in the R environment."
590
+ ),
591
+ shiny::p(
592
+ shiny::tags$em(
593
+ "This demo uses ",
594
+ shiny::tags$code("chat_openai()"),
595
+ " only and does not provide an option to change the gpt-5-mini-2025-08-07 model."
596
+ )
597
+ )
598
  )
599
  )
600
+ ),
601
+
602
+ # Footer
603
+ shiny::div(
604
+ class = "app-footer",
605
+ shiny::p(
606
+ "Built with ",
607
+ shiny::tags$a(href = "https://shiny.posit.co/", target = "_blank", "Shiny"),
608
+ " and ",
609
+ shiny::tags$a(href = "https://ellmer.tidyverse.org/", target = "_blank", "ellmer"),
610
+ " | ",
611
+ "Powered by OpenAI"
612
+ ),
613
+ shiny::p(
614
+ shiny::tags$em(
615
+ "Companion app to: 'What Should Quality Engineers Know about Generative AI', submitted by the app's authors to ",
616
+ shiny::tags$a(
617
+ href = "https://www.tandfonline.com/journals/lqen20",
618
+ target = "_blank",
619
+ "Quality Engineering."
620
+ )
621
+ )
622
+ ),
623
+ # shiny::p(
624
+ # "\u00A9 2026 Developed by researchers from Miami University, the University of Dayton, and the University of Amsterdam."
625
+ # )
626
  )
627
  )
628
 
629
  # Define server logic required to generate dynamic UI and extract data
630
  server = function(input, output, session) {
631
+
632
+ # Reactive value to store demo text
633
+ demo_text = shiny::reactiveVal("")
634
+
635
+ # Reactive value to store extracted results (for table display and CSV download)
636
+ extracted_results = shiny::reactiveVal(NULL)
637
+
638
+ # Observer to clear data when switching tabs
639
+ shiny::observeEvent(input$input_method, {
640
+ # Clear the extracted results table
641
+ extracted_results(NULL)
642
+ output$extracted_table = shiny::renderTable(NULL)
643
+
644
+ # Clear the text area when switching to paste tab (so placeholder shows)
645
+ if (input$input_method == "paste") {
646
+ shiny::updateTextAreaInput(session, "input_text", value = "")
647
+ }
648
+ }, ignoreInit = TRUE)
649
+
650
  # Load example data
651
  shiny::observeEvent(input$load_example, {
652
  example_text = "Ford Motor Company (Ford) is recalling certain 2021-2022 Bronco vehicles equipped with rearview camera systems and 8-inch screen displays. The rearview camera image may still be displayed after a backing event has ended. As such, these vehicles fail to comply with the requirements of Federal Motor Vehicle Safety Standard number 111, \"Rear Visibility.\"\n\nHonda (American Honda Motor Co.) is recalling certain 2022-2025 Acura MDX Type-S, 2023-2025 Honda Pilot, and 2021-2025 Acura TLX Type-S vehicles. A software error in the fuel injection electronic control unit (FI-ECU) may cause an engine stall or a loss of power."
653
+
654
+ # Store demo text
655
+ demo_text(example_text)
656
+
657
+ # Also update the text area for extraction
658
  shiny::updateTextAreaInput(session, "input_text", value = example_text)
659
+
660
  # Set up example fields
661
  shiny::updateNumericInput(session, "num_fields", value = num_example_fields)
662
+
663
+ shiny::showNotification(
664
+ "Demo data loaded! Click 'Extract Data' to see results.",
665
+ type = "message",
666
+ duration = 3
667
+ )
668
  })
669
+
670
+ # Render demo preview
671
+ output$demo_preview_ui = shiny::renderUI({
672
+ text = demo_text()
673
+ if (is.null(text) || nchar(text) == 0) return(NULL)
674
+
675
+ preview_text = if (nchar(text) > 500) {
676
+ paste0(substr(text, 1, 500), "...")
677
+ } else {
678
+ text
679
+ }
680
+
681
+ shiny::div(
682
+ class = "pdf-preview",
683
+ style = "margin-top: 15px;",
684
+ shiny::tags$strong("Loaded Demo Text:"),
685
+ shiny::tags$pre(preview_text)
686
+ )
687
+ })
688
+
689
+ # Reactive value to store PDF extracted text
690
+ pdf_text = shiny::reactiveVal("")
691
+
692
+ # Process uploaded PDF
693
+ shiny::observeEvent(input$pdf_file, {
694
+ shiny::req(input$pdf_file)
695
+
696
+ tryCatch({
697
+ # Extract text from PDF
698
+ pdf_path = input$pdf_file$datapath
699
+ extracted = pdftools::pdf_text(pdf_path)
700
+
701
+ # Limit to MAX_IMAGE_PAGES
702
+ total_pages = length(extracted)
703
+ if (total_pages > MAX_IMAGE_PAGES) {
704
+ extracted = extracted[1:MAX_IMAGE_PAGES]
705
+ shiny::showNotification(
706
+ paste("Only the first", MAX_IMAGE_PAGES, "pages will be processed (free app limit)."),
707
+ type = "warning",
708
+ duration = 5
709
+ )
710
+ }
711
+
712
+ # Combine all pages with double line breaks
713
+ combined_text = paste(extracted, collapse = "\n\n")
714
+
715
+ # Clean up the text (remove excessive whitespace)
716
+ combined_text = gsub("\\s+", " ", combined_text)
717
+ combined_text = trimws(combined_text)
718
+
719
+ # Store the extracted text
720
+ pdf_text(combined_text)
721
+
722
+ # Update the text area with extracted text
723
+ shiny::updateTextAreaInput(session, "input_text", value = combined_text)
724
+
725
+ shiny::showNotification(
726
+ paste("PDF processed successfully!", length(extracted), "page(s) extracted."),
727
+ type = "message",
728
+ duration = 3
729
+ )
730
+ }, error = function(e) {
731
+ shiny::showNotification(
732
+ paste("Error reading PDF:", e$message),
733
+ type = "error",
734
+ duration = NULL
735
+ )
736
+ })
737
+ })
738
+
739
+ # Render PDF text preview
740
+ output$pdf_preview_ui = shiny::renderUI({
741
+ text = pdf_text()
742
+ if (is.null(text) || nchar(text) == 0) return(NULL)
743
+
744
+ preview_text = if (nchar(text) > 500) {
745
+ paste0(substr(text, 1, 500), "...")
746
+ } else {
747
+ text
748
+ }
749
+
750
+ shiny::div(
751
+ class = "pdf-preview",
752
+ shiny::tags$strong("Extracted Text Preview:"),
753
+ shiny::tags$pre(preview_text)
754
+ )
755
+ })
756
+
757
+ # Reactive value for text file content
758
+ textfile_text = shiny::reactiveVal("")
759
+
760
+ # Process uploaded text file
761
+ shiny::observeEvent(input$text_file, {
762
+ shiny::req(input$text_file)
763
+
764
+ tryCatch({
765
+ file_path = input$text_file$datapath
766
+ file_name = input$text_file$name
767
+ file_ext = tolower(tools::file_ext(file_name))
768
+
769
+ extracted_text = ""
770
+
771
+ if (file_ext == "csv") {
772
+ # Read CSV - assume single column or use first column
773
+ csv_data = utils::read.csv(file_path, header = TRUE, stringsAsFactors = FALSE)
774
+ if (ncol(csv_data) >= 1) {
775
+ # Use first column, combine rows with double line breaks
776
+ extracted_text = paste(csv_data[[1]], collapse = "\n\n")
777
+ }
778
+ } else {
779
+ # Read txt or md as plain text
780
+ extracted_text = paste(readLines(file_path, warn = FALSE), collapse = "\n")
781
+ }
782
+
783
+ # Clean up the text
784
+ extracted_text = trimws(extracted_text)
785
+
786
+ # Store the extracted text
787
+ textfile_text(extracted_text)
788
+
789
+ # Update the text area with extracted text
790
+ shiny::updateTextAreaInput(session, "input_text", value = extracted_text)
791
+
792
+ shiny::showNotification(
793
+ paste("File processed successfully!"),
794
+ type = "message",
795
+ duration = 3
796
+ )
797
+ }, error = function(e) {
798
+ shiny::showNotification(
799
+ paste("Error reading file:", e$message),
800
+ type = "error",
801
+ duration = NULL
802
+ )
803
+ })
804
+ })
805
+
806
+ # Render text file preview
807
+ output$textfile_preview_ui = shiny::renderUI({
808
+ text = textfile_text()
809
+ if (is.null(text) || nchar(text) == 0) return(NULL)
810
+
811
+ preview_text = if (nchar(text) > 500) {
812
+ paste0(substr(text, 1, 500), "...")
813
+ } else {
814
+ text
815
+ }
816
+
817
+ shiny::div(
818
+ class = "pdf-preview",
819
+ shiny::tags$strong("Extracted Text Preview:"),
820
+ shiny::tags$pre(preview_text)
821
+ )
822
+ })
823
+
824
+ # Reactive values to store image paths for vision-based extraction
825
+ image_paths = shiny::reactiveVal(NULL)
826
+ scanned_pdf_paths = shiny::reactiveVal(NULL)
827
+
828
+ # Process uploaded images
829
+ shiny::observeEvent(input$image_file, {
830
+ shiny::req(input$image_file)
831
+
832
+ tryCatch({
833
+ files = input$image_file
834
+ # Limit to MAX_IMAGE_PAGES
835
+ n_files = min(nrow(files), MAX_IMAGE_PAGES)
836
+
837
+ if (nrow(files) > MAX_IMAGE_PAGES) {
838
+ shiny::showNotification(
839
+ paste("Only the first", MAX_IMAGE_PAGES, "images will be processed (free app limit)."),
840
+ type = "warning",
841
+ duration = 5
842
+ )
843
+ }
844
+
845
+ # Store paths of uploaded images
846
+ paths = files$datapath[1:n_files]
847
+ image_paths(paths)
848
+
849
+ shiny::showNotification(
850
+ paste(n_files, "image(s) uploaded successfully!"),
851
+ type = "message",
852
+ duration = 3
853
+ )
854
+ }, error = function(e) {
855
+ shiny::showNotification(
856
+ paste("Error uploading images:", e$message),
857
+ type = "error",
858
+ duration = NULL
859
+ )
860
+ })
861
+ })
862
+
863
+ # Render image preview
864
+ output$image_preview_ui = shiny::renderUI({
865
+ paths = image_paths()
866
+ if (is.null(paths) || length(paths) == 0) return(NULL)
867
+
868
+ shiny::tagList(
869
+ shiny::div(
870
+ class = "image-preview-grid",
871
+ lapply(seq_along(paths), function(i) {
872
+ shiny::div(
873
+ class = "image-preview-item",
874
+ shiny::tags$img(
875
+ src = base64enc::dataURI(file = paths[i], mime = "image/png"),
876
+ alt = paste("Image", i)
877
+ )
878
+ )
879
+ })
880
+ ),
881
+ shiny::div(
882
+ class = "image-preview-count",
883
+ paste(length(paths), "image(s) ready for extraction")
884
+ )
885
+ )
886
+ })
887
+
888
+ # Process scanned PDF (convert to images)
889
+ shiny::observeEvent(input$scanned_pdf_file, {
890
+ shiny::req(input$scanned_pdf_file)
891
+
892
+ tryCatch({
893
+ pdf_path = input$scanned_pdf_file$datapath
894
+
895
+ # Get number of pages
896
+ pdf_info = pdftools::pdf_info(pdf_path)
897
+ n_pages = min(pdf_info$pages, MAX_IMAGE_PAGES)
898
+
899
+ if (pdf_info$pages > MAX_IMAGE_PAGES) {
900
+ shiny::showNotification(
901
+ paste("Only the first", MAX_IMAGE_PAGES, "pages will be processed (free app limit)."),
902
+ type = "warning",
903
+ duration = 5
904
+ )
905
+ }
906
+
907
+ shiny::showNotification(
908
+ "Converting PDF pages to images...",
909
+ type = "message",
910
+ duration = NULL,
911
+ id = "convert_notif"
912
+ )
913
+
914
+ # Convert PDF pages to images using magick
915
+ pdf_images = magick::image_read_pdf(pdf_path, pages = 1:n_pages, density = 150)
916
+
917
+ # Save images to temp files
918
+ temp_paths = sapply(1:n_pages, function(i) {
919
+ temp_file = tempfile(fileext = ".png")
920
+ magick::image_write(pdf_images[i], temp_file, format = "png")
921
+ temp_file
922
+ })
923
+
924
+ scanned_pdf_paths(temp_paths)
925
+
926
+ shiny::removeNotification(id = "convert_notif")
927
+ shiny::showNotification(
928
+ paste(n_pages, "page(s) converted successfully!"),
929
+ type = "message",
930
+ duration = 3
931
+ )
932
+ }, error = function(e) {
933
+ shiny::removeNotification(id = "convert_notif")
934
+ shiny::showNotification(
935
+ paste("Error processing scanned PDF:", e$message),
936
+ type = "error",
937
+ duration = NULL
938
+ )
939
+ })
940
+ })
941
+
942
+ # Render scanned PDF preview
943
+ output$scanned_pdf_preview_ui = shiny::renderUI({
944
+ paths = scanned_pdf_paths()
945
+ if (is.null(paths) || length(paths) == 0) return(NULL)
946
+
947
+ shiny::tagList(
948
+ shiny::div(
949
+ class = "image-preview-grid",
950
+ lapply(seq_along(paths), function(i) {
951
+ shiny::div(
952
+ class = "image-preview-item",
953
+ shiny::tags$img(
954
+ src = base64enc::dataURI(file = paths[i], mime = "image/png"),
955
+ alt = paste("Page", i)
956
+ )
957
+ )
958
+ })
959
+ ),
960
+ shiny::div(
961
+ class = "image-preview-count",
962
+ paste(length(paths), "page(s) ready for extraction")
963
+ )
964
+ )
965
+ })
966
+
967
  # Dynamically generate UI elements for each field's label and description
968
  output$fields_ui = shiny::renderUI({
969
  n = input$num_fields
970
+ input_method = input$input_method
971
  if (is.null(n) || n < 1) return(NULL)
972
+ if (is.null(input_method)) input_method = "demo"
973
+
974
+ # Use NHTSA example fields only for demo mode
975
+ if (input_method == "demo") {
976
+ example_labels = c("manufacturer", "defect_summary", "models", "model_years", "component", "fmvss_number", "root_cause", "risk")
977
+ example_descs = c(
978
+ "The name of the company recalling the vehicles.",
979
+ "Summary of the main defect.",
980
+ "List of affected vehicle models.",
981
+ "List of model years affected.",
982
+ "The part or system affected by the defect.",
983
+ "The FMVSS number mentioned, if any.",
984
+ "The root cause of the defect.",
985
+ "The risk or consequence posed by the defect."
986
+ )
987
+ } else {
988
+ # Generic placeholders for non-demo modes
989
+ example_labels = character(0)
990
+ example_descs = character(0)
991
+ }
992
+
993
  fields = purrr::map(1:n, function(i) {
994
+ # Set default values - use examples for demo, generic for others
995
+ if (input_method == "demo" && i <= length(example_labels)) {
996
+ default_label = example_labels[i]
997
+ default_desc = example_descs[i]
998
+ } else {
999
+ default_label = paste0("field_", i)
1000
+ default_desc = "Describe what to extract for this field"
1001
+ }
1002
+
1003
+ shiny::div(
1004
+ style = "background-color: #EDECE2; padding: 12px; border-radius: 6px; margin-bottom: 12px;",
1005
+ shiny::tags$span(
1006
+ style = "color: #C41230; font-weight: 600; font-size: 0.9em;",
1007
+ paste("Field", i)
1008
+ ),
1009
  shiny::textInput(
1010
+ paste0("field_label_", i),
1011
+ "Label:",
1012
  value = default_label
1013
  ),
1014
  shiny::textInput(
1015
+ paste0("field_desc_", i),
1016
+ "Description:",
1017
  value = default_desc
1018
+ )
 
1019
  )
1020
  })
1021
  do.call(shiny::tagList, fields)
1022
  })
1023
+
1024
  # Build a custom type_object based on user-specified fields
1025
  create_type_object = shiny::reactive({
1026
  n = input$num_fields
1027
  if (is.null(n) || n < 1) return(NULL)
1028
+
1029
  # Build a list of field definitions
1030
  type_list = list()
1031
+ for (i in 1:n) {
1032
  label = input[[paste0("field_label_", i)]]
1033
+ desc = input[[paste0("field_desc_", i)]]
1034
+ if (!is.null(label) && label != "") {
1035
  type_list[[label]] = ellmer::type_string(desc, required = FALSE)
1036
  }
1037
  }
1038
  # Dynamically create the type object
1039
  do.call(ellmer::type_object, type_list)
1040
  })
1041
+
1042
  # When the extract button is clicked, perform extraction
1043
  shiny::observeEvent(input$extract_btn, {
1044
+ # Get the current input method
1045
+ input_method = input$input_method
1046
+ if (is.null(input_method)) input_method = "demo"
1047
+
1048
+ # Validate input based on method with helpful error messages
1049
+ has_data = FALSE
1050
+ error_msg = ""
1051
+
1052
+ # Suffix to add to error messages reminding about field configuration
1053
+ field_reminder = " Also, make sure to update the field labels and descriptions for your data."
1054
+
1055
+ if (input_method == "demo") {
1056
+ # Check if demo data was loaded
1057
+ if (is.null(demo_text()) || nchar(demo_text()) == 0) {
1058
+ error_msg = "Please click 'Load Demo Data' first to load the sample data."
1059
+ } else {
1060
+ has_data = TRUE
1061
+ }
1062
+ } else if (input_method == "paste") {
1063
+ # Check if text was pasted
1064
+ if (is.null(input$input_text) || nchar(trimws(input$input_text)) == 0) {
1065
+ error_msg = paste0("Please paste your text content in the text area.", field_reminder)
1066
+ } else {
1067
+ has_data = TRUE
1068
+ }
1069
+ } else if (input_method == "textfile") {
1070
+ # Check if text file was uploaded
1071
+ if (is.null(textfile_text()) || nchar(textfile_text()) == 0) {
1072
+ error_msg = paste0("Please upload a text file (.txt, .csv, or .md) first.", field_reminder)
1073
+ } else {
1074
+ has_data = TRUE
1075
+ }
1076
+ } else if (input_method == "pdf") {
1077
+ # Check if PDF was uploaded
1078
+ if (is.null(pdf_text()) || nchar(pdf_text()) == 0) {
1079
+ error_msg = paste0("Please upload a readable PDF file first.", field_reminder)
1080
+ } else {
1081
+ has_data = TRUE
1082
+ }
1083
+ } else if (input_method == "image") {
1084
+ # Check if images were uploaded
1085
+ if (is.null(image_paths()) || length(image_paths()) == 0) {
1086
+ error_msg = paste0("Please upload one or more image files first.", field_reminder)
1087
+ } else {
1088
+ has_data = TRUE
1089
+ }
1090
+ } else if (input_method == "scanned") {
1091
+ # Check if scanned PDF was uploaded
1092
+ if (is.null(scanned_pdf_paths()) || length(scanned_pdf_paths()) == 0) {
1093
+ error_msg = paste0("Please upload a scanned PDF file first.", field_reminder)
1094
+ } else {
1095
+ has_data = TRUE
1096
+ }
1097
+ }
1098
+
1099
+ # Show error if no data
1100
+ if (!has_data) {
1101
+ shiny::showNotification(
1102
+ shiny::tags$span(
1103
+ shiny::icon("exclamation-triangle"),
1104
+ " ",
1105
+ error_msg
1106
+ ),
1107
+ type = "error",
1108
+ duration = 5
1109
+ )
1110
+ return()
1111
+ }
1112
+
1113
  # Show processing indicator
1114
+ shiny::showNotification(
1115
+ shiny::tags$span(
1116
+ shiny::icon("spinner", class = "fa-spin"),
1117
+ " Processing extraction request..."
1118
+ ),
1119
+ type = "message",
1120
+ duration = NULL,
1121
+ id = "extract_notif"
1122
+ )
1123
+
1124
  custom_type_object = create_type_object()
1125
+
1126
  # Initialize the chat object using the OpenAI API key from your environment
1127
  tryCatch({
1128
  # Check if API key is available
1129
  if (Sys.getenv("OPENAI_API_KEY") == "") {
1130
  stop("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
1131
  }
1132
+
1133
  chat = ellmer::chat_openai(
1134
+ model = "gpt-5-mini-2025-08-07",
1135
+ )
1136
+
 
 
 
 
 
 
 
 
 
 
 
1137
  all_results = list()
1138
+
1139
+ if (input_method %in% c("demo", "paste", "pdf", "textfile")) {
1140
+ # Text-based extraction
1141
+ # Split text by double linebreaks to process multiple entities
1142
+ text_blocks = unlist(strsplit(input$input_text, "\n\n"))
1143
+ text_blocks = text_blocks[text_blocks != ""] # Remove empty blocks
1144
+
1145
+ # Process each text block
1146
+ for (i in seq_along(text_blocks)) {
1147
+ result = chat$chat_structured(text_blocks[i], type = custom_type_object)
1148
+ if (is.list(result)) {
1149
+ result$source_id = i
1150
+ all_results[[i]] = result
1151
+ }
1152
+ }
1153
+
1154
+ } else if (input_method == "image") {
1155
+ # Vision-based extraction from uploaded images
1156
+ paths = image_paths()
1157
+
1158
+ for (i in seq_along(paths)) {
1159
+ # Create image content for the model
1160
+ image_content = ellmer::content_image_file(paths[i], resize = "high")
1161
+ result = chat$chat_structured(image_content, type = custom_type_object)
1162
+ if (is.list(result)) {
1163
+ result$source_id = paste("Image", i)
1164
+ all_results[[i]] = result
1165
+ }
1166
+ }
1167
+
1168
+ } else if (input_method == "scanned") {
1169
+ # Vision-based extraction from scanned PDF pages
1170
+ paths = scanned_pdf_paths()
1171
+
1172
+ for (i in seq_along(paths)) {
1173
+ # Create image content for the model
1174
+ image_content = ellmer::content_image_file(paths[i], resize = "high")
1175
+ result = chat$chat_structured(image_content, type = custom_type_object)
1176
+ if (is.list(result)) {
1177
+ result$source_id = paste("Page", i)
1178
+ all_results[[i]] = result
1179
+ }
1180
  }
1181
  }
1182
+
1183
  # Combine all results into a single data frame
1184
  if (length(all_results) > 0) {
1185
  combined_results = do.call(rbind, lapply(all_results, function(x) {
1186
  # Ensure all results have the same columns by converting to data frame
1187
  as.data.frame(x)
1188
  }))
1189
+
1190
+ # Store results for CSV download
1191
+ extracted_results(combined_results)
1192
+
1193
  # Render the output as a table
1194
+ output$extracted_table = shiny::renderTable(
1195
+ {
1196
+ combined_results
1197
+ },
1198
+ rownames = TRUE,
1199
+ striped = TRUE,
1200
+ hover = TRUE,
1201
+ bordered = TRUE
1202
+ )
1203
  } else {
1204
  # Handle the case when no valid results are returned
1205
+ extracted_results(NULL)
1206
  output$extracted_table = shiny::renderTable({
1207
+ data.frame(Message = "No valid data could be extracted. Please check your input and field definitions.")
1208
  })
1209
  }
1210
+
1211
  # Remove notification
1212
  shiny::removeNotification(id = "extract_notif")
1213
+ shiny::showNotification(
1214
+ shiny::tags$span(
1215
+ shiny::icon("check-circle"),
1216
+ " Extraction complete!"
1217
+ ),
1218
+ type = "message",
1219
+ duration = 3
1220
+ )
1221
  }, error = function(e) {
1222
  # Handle errors
1223
  shiny::removeNotification(id = "extract_notif")
1224
+ shiny::showNotification(
1225
+ shiny::tags$span(
1226
+ shiny::icon("exclamation-triangle"),
1227
+ " Error: ",
1228
+ e$message
1229
+ ),
1230
+ type = "error",
1231
+ duration = NULL
1232
+ )
1233
  })
1234
  })
1235
+
1236
+ # Render download button only when results are available
1237
+ output$download_btn_ui = shiny::renderUI({
1238
+ results = extracted_results()
1239
+ if (is.null(results) || nrow(results) == 0) return(NULL)
1240
+
1241
+ shiny::div(
1242
+ style = "margin-top: 15px;",
1243
+ shiny::downloadButton(
1244
+ "download_csv",
1245
+ "Download as CSV",
1246
+ class = "btn-info"
1247
+ )
1248
+ )
1249
+ })
1250
+
1251
+ # Download handler for CSV export
1252
+ output$download_csv = shiny::downloadHandler(
1253
+ filename = function() {
1254
+ paste0("extracted_data_", format(Sys.time(), "%Y%m%d_%H%M%S"), ".csv")
1255
+ },
1256
+ content = function(file) {
1257
+ results = extracted_results()
1258
+ if (!is.null(results)) {
1259
+ utils::write.csv(results, file, row.names = FALSE)
1260
+ }
1261
+ }
1262
+ )
1263
  }
1264
 
1265
  # Run the Shiny app
1266
+ shiny::shinyApp(ui = ui, server = server)