Spaces:
Sleeping
Sleeping
Updating app.R to version 2.0
Browse files
app.R
CHANGED
|
@@ -1,244 +1,1266 @@
|
|
| 1 |
library(shiny)
|
| 2 |
library(ellmer)
|
| 3 |
library(purrr)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
num_example_fields = 2
|
| 6 |
|
|
|
|
|
|
|
| 7 |
# Define UI for the app
|
| 8 |
ui = shiny::fluidPage(
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
)
|
| 26 |
),
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
shiny::
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
)
|
| 46 |
),
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
shiny::sidebarLayout(
|
| 49 |
shiny::sidebarPanel(
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
),
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
shiny::numericInput(
|
| 58 |
-
"num_fields",
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
| 62 |
max = 10
|
| 63 |
),
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
shiny::uiOutput("fields_ui"),
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
),
|
| 74 |
-
|
| 75 |
shiny::mainPanel(
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
shiny::tags$ul(
|
| 84 |
-
shiny::tags$li("Use specific field descriptions to guide the AI"),
|
| 85 |
-
shiny::tags$li("Start with more fields and remove unnecessary ones later"),
|
| 86 |
-
shiny::tags$li("If results are inaccurate, try rephrasing your field descriptions"),
|
| 87 |
-
shiny::tags$li("
|
| 88 |
-
shiny::tags$li("Each
|
| 89 |
)
|
| 90 |
),
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
shiny::h4("Note
|
| 95 |
-
shiny::p(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
)
|
| 97 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
)
|
| 99 |
)
|
| 100 |
|
| 101 |
# Define server logic required to generate dynamic UI and extract data
|
| 102 |
server = function(input, output, session) {
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
# Load example data
|
| 105 |
shiny::observeEvent(input$load_example, {
|
| 106 |
example_text = "Ford Motor Company (Ford) is recalling certain 2021-2022 Bronco vehicles equipped with rearview camera systems and 8-inch screen displays. The rearview camera image may still be displayed after a backing event has ended. As such, these vehicles fail to comply with the requirements of Federal Motor Vehicle Safety Standard number 111, \"Rear Visibility.\"\n\nHonda (American Honda Motor Co.) is recalling certain 2022-2025 Acura MDX Type-S, 2023-2025 Honda Pilot, and 2021-2025 Acura TLX Type-S vehicles. A software error in the fuel injection electronic control unit (FI-ECU) may cause an engine stall or a loss of power."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
shiny::updateTextAreaInput(session, "input_text", value = example_text)
|
| 108 |
-
|
| 109 |
# Set up example fields
|
| 110 |
shiny::updateNumericInput(session, "num_fields", value = num_example_fields)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
})
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
# Dynamically generate UI elements for each field's label and description
|
| 114 |
output$fields_ui = shiny::renderUI({
|
| 115 |
n = input$num_fields
|
|
|
|
| 116 |
if (is.null(n) || n < 1) return(NULL)
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
"
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
fields = purrr::map(1:n, function(i) {
|
| 132 |
-
# Set default values
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
shiny::textInput(
|
| 138 |
-
paste0("field_label_", i),
|
| 139 |
-
|
| 140 |
value = default_label
|
| 141 |
),
|
| 142 |
shiny::textInput(
|
| 143 |
-
paste0("field_desc_", i),
|
| 144 |
-
|
| 145 |
value = default_desc
|
| 146 |
-
)
|
| 147 |
-
shiny::hr()
|
| 148 |
)
|
| 149 |
})
|
| 150 |
do.call(shiny::tagList, fields)
|
| 151 |
})
|
| 152 |
-
|
| 153 |
# Build a custom type_object based on user-specified fields
|
| 154 |
create_type_object = shiny::reactive({
|
| 155 |
n = input$num_fields
|
| 156 |
if (is.null(n) || n < 1) return(NULL)
|
| 157 |
-
|
| 158 |
# Build a list of field definitions
|
| 159 |
type_list = list()
|
| 160 |
-
for(i in 1:n){
|
| 161 |
label = input[[paste0("field_label_", i)]]
|
| 162 |
-
desc
|
| 163 |
-
if (!is.null(label) && label != ""){
|
| 164 |
type_list[[label]] = ellmer::type_string(desc, required = FALSE)
|
| 165 |
}
|
| 166 |
}
|
| 167 |
# Dynamically create the type object
|
| 168 |
do.call(ellmer::type_object, type_list)
|
| 169 |
})
|
| 170 |
-
|
| 171 |
# When the extract button is clicked, perform extraction
|
| 172 |
shiny::observeEvent(input$extract_btn, {
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
# Show processing indicator
|
| 176 |
-
shiny::showNotification(
|
| 177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
custom_type_object = create_type_object()
|
| 179 |
-
|
| 180 |
# Initialize the chat object using the OpenAI API key from your environment
|
| 181 |
tryCatch({
|
| 182 |
# Check if API key is available
|
| 183 |
if (Sys.getenv("OPENAI_API_KEY") == "") {
|
| 184 |
stop("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
|
| 185 |
}
|
| 186 |
-
|
| 187 |
chat = ellmer::chat_openai(
|
| 188 |
-
model =
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
# Extraction function
|
| 193 |
-
extract_fn = function(x, chat_object, custom_type_object) {
|
| 194 |
-
return(chat_object$extract_data(x, type = custom_type_object))
|
| 195 |
-
}
|
| 196 |
-
|
| 197 |
-
# Split text by double linebreaks to process multiple entities
|
| 198 |
-
text_blocks = unlist(strsplit(input$input_text, "\n\n"))
|
| 199 |
-
text_blocks = text_blocks[text_blocks != ""] # Remove empty blocks
|
| 200 |
-
|
| 201 |
-
# Process each text block
|
| 202 |
all_results = list()
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
}
|
| 211 |
}
|
| 212 |
-
|
| 213 |
# Combine all results into a single data frame
|
| 214 |
if (length(all_results) > 0) {
|
| 215 |
combined_results = do.call(rbind, lapply(all_results, function(x) {
|
| 216 |
# Ensure all results have the same columns by converting to data frame
|
| 217 |
as.data.frame(x)
|
| 218 |
}))
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
| 220 |
# Render the output as a table
|
| 221 |
-
output$extracted_table = shiny::renderTable(
|
| 222 |
-
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
} else {
|
| 225 |
# Handle the case when no valid results are returned
|
|
|
|
| 226 |
output$extracted_table = shiny::renderTable({
|
| 227 |
-
data.frame(
|
| 228 |
})
|
| 229 |
}
|
| 230 |
-
|
| 231 |
# Remove notification
|
| 232 |
shiny::removeNotification(id = "extract_notif")
|
| 233 |
-
shiny::showNotification(
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
}, error = function(e) {
|
| 236 |
# Handle errors
|
| 237 |
shiny::removeNotification(id = "extract_notif")
|
| 238 |
-
shiny::showNotification(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
})
|
| 240 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
}
|
| 242 |
|
| 243 |
# Run the Shiny app
|
| 244 |
-
shiny::shinyApp(ui = ui, server = server)
|
|
|
|
| 1 |
library(shiny)
|
| 2 |
library(ellmer)
|
| 3 |
library(purrr)
|
| 4 |
+
library(pdftools)
|
| 5 |
+
library(magick)
|
| 6 |
+
library(base64enc)
|
| 7 |
+
|
| 8 |
+
# Maximum pages for image-based extraction (free app limit)
|
| 9 |
+
MAX_IMAGE_PAGES = 5
|
| 10 |
|
| 11 |
num_example_fields = 2
|
| 12 |
|
| 13 |
+
|
| 14 |
+
|
| 15 |
# Define UI for the app
|
| 16 |
ui = shiny::fluidPage(
|
| 17 |
+
# Add Miami theme CSS and favicon
|
| 18 |
+
shiny::tags$head(
|
| 19 |
+
shiny::tags$link(rel = "icon", type = "image/svg+xml", href = "favicon.svg"),
|
| 20 |
+
shiny::tags$link(rel = "stylesheet", type = "text/css", href = "miami-theme.css"),
|
| 21 |
+
shiny::tags$title("AI-Powered Text Extraction Tool")
|
| 22 |
+
),
|
| 23 |
+
|
| 24 |
+
# Custom header with Miami branding
|
| 25 |
+
shiny::div(
|
| 26 |
+
class = "app-header",
|
| 27 |
+
shiny::div(
|
| 28 |
+
class = "header-content",
|
| 29 |
+
shiny::div(
|
| 30 |
+
class = "header-left",
|
| 31 |
+
shiny::tags$h1("AI-Powered Text Extraction Tool"),
|
| 32 |
+
shiny::p(class = "subtitle", "Extract Structured Data from Text, Documents, and Images")
|
| 33 |
+
),
|
| 34 |
+
shiny::div(
|
| 35 |
+
class = "header-right",
|
| 36 |
+
shiny::p("Version 2.0 | January 2026"),
|
| 37 |
+
shiny::p(
|
| 38 |
+
shiny::tags$strong("Authors: "),
|
| 39 |
+
"Fadel M. Megahed, Ying-Ju (Tessa) Chen, Allison Jones-Farmer, Ibrahim Yousif, and Inez M. Zwetsloot"
|
| 40 |
+
),
|
| 41 |
+
shiny::p(
|
| 42 |
+
shiny::tags$strong("Contact: "),
|
| 43 |
+
shiny::tags$a(
|
| 44 |
+
href = "mailto:fmegahed@miamioh.edu",
|
| 45 |
+
style = "color: #EFDB72;",
|
| 46 |
+
"fmegahed@miamioh.edu"
|
| 47 |
+
)
|
| 48 |
+
)
|
| 49 |
+
)
|
| 50 |
+
)
|
| 51 |
+
),
|
| 52 |
+
|
| 53 |
+
# University logos section
|
| 54 |
+
shiny::div(
|
| 55 |
+
class = "logo-container",
|
| 56 |
+
shiny::tags$img(
|
| 57 |
+
src = "miami-logo.png",
|
| 58 |
+
alt = "Miami University Logo",
|
| 59 |
+
style = "height: 55px;"
|
| 60 |
+
),
|
| 61 |
+
shiny::div(class = "logo-divider"),
|
| 62 |
+
shiny::tags$img(
|
| 63 |
+
src = "university-of-dayton-vector-logo.png",
|
| 64 |
+
alt = "University of Dayton Logo",
|
| 65 |
+
style = "height: 50px;"
|
| 66 |
+
),
|
| 67 |
+
shiny::div(class = "logo-divider"),
|
| 68 |
+
shiny::tags$img(
|
| 69 |
+
src = "uva-compacte-logo.png",
|
| 70 |
+
alt = "University of Amsterdam Logo",
|
| 71 |
+
style = "height: 50px;"
|
| 72 |
+
),
|
| 73 |
+
shiny::div(
|
| 74 |
+
style = "margin-left: auto; font-size: 0.85em; color: #666;",
|
| 75 |
+
shiny::p(
|
| 76 |
+
style = "margin: 0;",
|
| 77 |
+
"A collaboration between",
|
| 78 |
+
shiny::tags$strong("Miami University,"),
|
| 79 |
+
"the",
|
| 80 |
+
shiny::tags$strong("University of Dayton,"),
|
| 81 |
+
"and the",
|
| 82 |
+
shiny::tags$strong("University of Amsterdam")
|
| 83 |
+
)
|
| 84 |
+
)
|
| 85 |
+
),
|
| 86 |
+
|
| 87 |
+
# How to use section
|
| 88 |
+
shiny::div(
|
| 89 |
+
class = "how-to-use",
|
| 90 |
+
shiny::div(
|
| 91 |
+
style = "display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;",
|
| 92 |
+
shiny::tags$h4(
|
| 93 |
+
style = "margin: 0;",
|
| 94 |
+
shiny::icon("circle-info"),
|
| 95 |
+
" How to Use This App"
|
| 96 |
+
),
|
| 97 |
+
shiny::tags$button(
|
| 98 |
+
id = "open-video-modal",
|
| 99 |
+
class = "btn btn-video-tutorial",
|
| 100 |
+
shiny::icon("play-circle"),
|
| 101 |
+
" Watch Video Tutorial"
|
| 102 |
+
)
|
| 103 |
+
),
|
| 104 |
+
shiny::tags$h5(
|
| 105 |
+
style = "color: #C41230; margin-top: 0;",
|
| 106 |
+
shiny::icon("bolt"),
|
| 107 |
+
" Quick Demo"
|
| 108 |
+
),
|
| 109 |
+
shiny::p(
|
| 110 |
+
"Click the ",
|
| 111 |
+
shiny::tags$strong("'Load Examples'"),
|
| 112 |
+
" button to load two sample NHTSA recall notices with pre-configured extraction fields. ",
|
| 113 |
+
"Then click ",
|
| 114 |
+
shiny::tags$strong("'Extract Data'"),
|
| 115 |
+
" to see the AI extract structured information from the text. ",
|
| 116 |
+
"You can also add more fields, change field labels, and modify descriptions. ",
|
| 117 |
+
shiny::tags$em(
|
| 118 |
+
"(Note: Field labels only affect how data is stored and displayed, not extraction performance.)"
|
| 119 |
+
)
|
| 120 |
+
),
|
| 121 |
+
shiny::tags$h5(
|
| 122 |
+
style = "color: #C41230; margin-top: 15px;",
|
| 123 |
+
shiny::icon("file-import"),
|
| 124 |
+
" Input Methods"
|
| 125 |
+
),
|
| 126 |
+
shiny::p(
|
| 127 |
+
"Choose from six input methods to provide your source content:"
|
| 128 |
+
),
|
| 129 |
+
shiny::tags$ul(
|
| 130 |
+
style = "margin-bottom: 10px;",
|
| 131 |
+
shiny::tags$li(
|
| 132 |
+
shiny::tags$strong("Demo Data:"),
|
| 133 |
+
" Try the app with pre-loaded NHTSA vehicle recall examples."
|
| 134 |
+
),
|
| 135 |
+
shiny::tags$li(
|
| 136 |
+
shiny::tags$strong("Paste Text:"),
|
| 137 |
+
" Directly paste text content. Separate multiple items with double line breaks."
|
| 138 |
+
),
|
| 139 |
+
shiny::tags$li(
|
| 140 |
+
shiny::tags$strong("Text File:"),
|
| 141 |
+
" Upload .txt, .csv (single column), or .md files."
|
| 142 |
+
),
|
| 143 |
+
shiny::tags$li(
|
| 144 |
+
shiny::tags$strong("Readable PDF:"),
|
| 145 |
+
" Upload a machine-readable PDF. Text is extracted automatically."
|
| 146 |
+
),
|
| 147 |
+
shiny::tags$li(
|
| 148 |
+
shiny::tags$strong("Scanned PDF:"),
|
| 149 |
+
" Upload scanned/image-based PDFs. Pages are converted to images for vision processing."
|
| 150 |
+
),
|
| 151 |
+
shiny::tags$li(
|
| 152 |
+
shiny::tags$strong("Upload Images:"),
|
| 153 |
+
" Upload images (PNG, JPEG, WebP, GIF) for AI vision-based extraction."
|
| 154 |
+
)
|
| 155 |
+
),
|
| 156 |
+
shiny::p(
|
| 157 |
+
style = "font-size: 0.9em; color: #666;",
|
| 158 |
+
shiny::icon("info-circle"),
|
| 159 |
+
" ",
|
| 160 |
+
shiny::tags$em(
|
| 161 |
+
"Note: PDF, Image, and Scanned PDF modes are limited to 5 pages/items in this free demo."
|
| 162 |
+
)
|
| 163 |
+
),
|
| 164 |
+
shiny::tags$h5(
|
| 165 |
+
style = "color: #C41230; margin-top: 15px;",
|
| 166 |
+
shiny::icon("edit"),
|
| 167 |
+
" Custom Fields"
|
| 168 |
+
),
|
| 169 |
+
shiny::p(
|
| 170 |
+
"Customize your extraction by adjusting:"
|
| 171 |
+
),
|
| 172 |
+
shiny::tags$ul(
|
| 173 |
+
style = "margin-bottom: 10px;",
|
| 174 |
+
shiny::tags$li("The ", shiny::tags$strong("number of fields"), " to extract"),
|
| 175 |
+
shiny::tags$li("Your own ", shiny::tags$strong("field labels"), " (for storage/display only)"),
|
| 176 |
+
shiny::tags$li("Detailed ", shiny::tags$strong("field descriptions"), " to guide the AI")
|
| 177 |
+
),
|
| 178 |
+
shiny::p(
|
| 179 |
+
shiny::icon("lightbulb"),
|
| 180 |
+
" ",
|
| 181 |
+
shiny::tags$em(
|
| 182 |
+
"The more specific your field descriptions, the better the extraction results."
|
| 183 |
+
)
|
| 184 |
)
|
| 185 |
),
|
| 186 |
+
|
| 187 |
+
# Video Modal
|
| 188 |
+
shiny::div(
|
| 189 |
+
id = "video-modal",
|
| 190 |
+
class = "video-modal",
|
| 191 |
+
shiny::div(
|
| 192 |
+
class = "video-modal-content",
|
| 193 |
+
shiny::tags$span(
|
| 194 |
+
id = "close-video-modal",
|
| 195 |
+
class = "video-modal-close",
|
| 196 |
+
shiny::HTML("×")
|
| 197 |
+
),
|
| 198 |
+
shiny::tags$h4(
|
| 199 |
+
style = "color: #C41230; margin-top: 0; margin-bottom: 15px;",
|
| 200 |
+
shiny::icon("play-circle"),
|
| 201 |
+
" Video Tutorial"
|
| 202 |
+
),
|
| 203 |
+
shiny::div(
|
| 204 |
+
class = "video-modal-wrapper",
|
| 205 |
+
# Placeholder - replace VIDEO_ID_PLACEHOLDER with actual YouTube video ID
|
| 206 |
+
shiny::tags$iframe(
|
| 207 |
+
id = "tutorial-video",
|
| 208 |
+
src = "",
|
| 209 |
+
allow = "accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture",
|
| 210 |
+
allowfullscreen = NA
|
| 211 |
+
)
|
| 212 |
+
)
|
| 213 |
)
|
| 214 |
),
|
| 215 |
+
|
| 216 |
+
# JavaScript for video modal
|
| 217 |
+
shiny::tags$script(shiny::HTML("
|
| 218 |
+
var videoUrl = 'https://www.youtube.com/embed/VIDEO_ID_PLACEHOLDER';
|
| 219 |
+
var modal = document.getElementById('video-modal');
|
| 220 |
+
var openBtn = document.getElementById('open-video-modal');
|
| 221 |
+
var closeBtn = document.getElementById('close-video-modal');
|
| 222 |
+
var videoIframe = document.getElementById('tutorial-video');
|
| 223 |
+
|
| 224 |
+
openBtn.onclick = function() {
|
| 225 |
+
modal.style.display = 'flex';
|
| 226 |
+
videoIframe.src = videoUrl;
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
closeBtn.onclick = function() {
|
| 230 |
+
modal.style.display = 'none';
|
| 231 |
+
videoIframe.src = '';
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
window.onclick = function(event) {
|
| 235 |
+
if (event.target == modal) {
|
| 236 |
+
modal.style.display = 'none';
|
| 237 |
+
videoIframe.src = '';
|
| 238 |
+
}
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
document.addEventListener('keydown', function(event) {
|
| 242 |
+
if (event.key === 'Escape' && modal.style.display === 'flex') {
|
| 243 |
+
modal.style.display = 'none';
|
| 244 |
+
videoIframe.src = '';
|
| 245 |
+
}
|
| 246 |
+
});
|
| 247 |
+
|
| 248 |
+
// Input tab switching
|
| 249 |
+
function switchInputTab(tab) {
|
| 250 |
+
var tabs = ['demo', 'paste', 'textfile', 'pdf', 'scanned', 'image'];
|
| 251 |
+
tabs.forEach(function(t) {
|
| 252 |
+
var tabEl = document.getElementById('tab-' + t);
|
| 253 |
+
var panelEl = document.getElementById('panel-' + t);
|
| 254 |
+
if (t === tab) {
|
| 255 |
+
tabEl.classList.add('active');
|
| 256 |
+
panelEl.style.display = 'block';
|
| 257 |
+
} else {
|
| 258 |
+
tabEl.classList.remove('active');
|
| 259 |
+
panelEl.style.display = 'none';
|
| 260 |
+
}
|
| 261 |
+
});
|
| 262 |
+
// Update Shiny with the current input method
|
| 263 |
+
Shiny.setInputValue('input_method', tab);
|
| 264 |
+
}
|
| 265 |
+
// Initialize input method
|
| 266 |
+
Shiny.setInputValue('input_method', 'demo');
|
| 267 |
+
")),
|
| 268 |
+
|
| 269 |
shiny::sidebarLayout(
|
| 270 |
shiny::sidebarPanel(
|
| 271 |
+
width = 4,
|
| 272 |
+
class = "sidebar-panel",
|
| 273 |
+
|
| 274 |
+
shiny::tags$h4(
|
| 275 |
+
style = "color: #C41230; margin-top: 0; margin-bottom: 15px;",
|
| 276 |
+
shiny::icon("file-alt"),
|
| 277 |
+
" Input Configuration"
|
| 278 |
+
),
|
| 279 |
+
|
| 280 |
+
# Input method tabs (3 rows for 5 tabs)
|
| 281 |
+
shiny::div(
|
| 282 |
+
class = "input-tabs-container",
|
| 283 |
+
shiny::div(
|
| 284 |
+
class = "input-tabs",
|
| 285 |
+
shiny::tags$button(
|
| 286 |
+
id = "tab-demo",
|
| 287 |
+
class = "input-tab active",
|
| 288 |
+
onclick = "switchInputTab('demo')",
|
| 289 |
+
shiny::icon("flask"),
|
| 290 |
+
" Demo Data"
|
| 291 |
+
),
|
| 292 |
+
shiny::tags$button(
|
| 293 |
+
id = "tab-paste",
|
| 294 |
+
class = "input-tab",
|
| 295 |
+
onclick = "switchInputTab('paste')",
|
| 296 |
+
shiny::icon("paste"),
|
| 297 |
+
" Paste Text"
|
| 298 |
+
)
|
| 299 |
+
),
|
| 300 |
+
shiny::div(
|
| 301 |
+
class = "input-tabs",
|
| 302 |
+
shiny::tags$button(
|
| 303 |
+
id = "tab-textfile",
|
| 304 |
+
class = "input-tab",
|
| 305 |
+
onclick = "switchInputTab('textfile')",
|
| 306 |
+
shiny::icon("file-lines"),
|
| 307 |
+
" Text File"
|
| 308 |
+
),
|
| 309 |
+
shiny::tags$button(
|
| 310 |
+
id = "tab-pdf",
|
| 311 |
+
class = "input-tab",
|
| 312 |
+
onclick = "switchInputTab('pdf')",
|
| 313 |
+
shiny::icon("file-pdf"),
|
| 314 |
+
" Readable PDF"
|
| 315 |
+
)
|
| 316 |
+
),
|
| 317 |
+
shiny::div(
|
| 318 |
+
class = "input-tabs",
|
| 319 |
+
shiny::tags$button(
|
| 320 |
+
id = "tab-scanned",
|
| 321 |
+
class = "input-tab",
|
| 322 |
+
onclick = "switchInputTab('scanned')",
|
| 323 |
+
shiny::icon("file-image"),
|
| 324 |
+
" Scanned PDF"
|
| 325 |
+
),
|
| 326 |
+
shiny::tags$button(
|
| 327 |
+
id = "tab-image",
|
| 328 |
+
class = "input-tab",
|
| 329 |
+
onclick = "switchInputTab('image')",
|
| 330 |
+
shiny::icon("images"),
|
| 331 |
+
" Upload Images"
|
| 332 |
+
)
|
| 333 |
+
)
|
| 334 |
),
|
| 335 |
+
|
| 336 |
+
# Demo data panel
|
| 337 |
+
shiny::div(
|
| 338 |
+
id = "panel-demo",
|
| 339 |
+
class = "input-panel",
|
| 340 |
+
shiny::p(
|
| 341 |
+
"Try the app with pre-loaded NHTSA vehicle recall data. Click the button below to load sample text and pre-configured extraction fields."
|
| 342 |
+
),
|
| 343 |
+
shiny::actionButton(
|
| 344 |
+
"load_example",
|
| 345 |
+
shiny::tags$span(shiny::icon("play"), " Load Demo Data"),
|
| 346 |
+
class = "btn-primary",
|
| 347 |
+
style = "margin-top: 10px;"
|
| 348 |
+
),
|
| 349 |
+
shiny::uiOutput("demo_preview_ui")
|
| 350 |
+
),
|
| 351 |
+
|
| 352 |
+
# Paste text panel
|
| 353 |
+
shiny::div(
|
| 354 |
+
id = "panel-paste",
|
| 355 |
+
class = "input-panel",
|
| 356 |
+
style = "display: none;",
|
| 357 |
+
shiny::textAreaInput(
|
| 358 |
+
"input_text",
|
| 359 |
+
NULL,
|
| 360 |
+
rows = 8,
|
| 361 |
+
placeholder = "Paste your text content here.\n\nTo process multiple items at once, separate each block of text with a double line break (press Enter twice).\n\nIMPORTANT: Before extracting, scroll down to configure your extraction fields:\n1. Set the number of fields you need\n2. Update each field label (e.g., 'product_name', 'price')\n3. Write clear descriptions for each field\n\nThen click 'Extract Data' to see results."
|
| 362 |
+
)
|
| 363 |
+
),
|
| 364 |
+
|
| 365 |
+
# Text file upload panel
|
| 366 |
+
shiny::div(
|
| 367 |
+
id = "panel-textfile",
|
| 368 |
+
class = "input-panel",
|
| 369 |
+
style = "display: none;",
|
| 370 |
+
shiny::fileInput(
|
| 371 |
+
"text_file",
|
| 372 |
+
NULL,
|
| 373 |
+
accept = c(".txt", ".csv", ".md"),
|
| 374 |
+
placeholder = "No file selected",
|
| 375 |
+
buttonLabel = shiny::tags$span(shiny::icon("upload"), " Browse...")
|
| 376 |
+
),
|
| 377 |
+
shiny::helpText(
|
| 378 |
+
shiny::icon("info-circle"),
|
| 379 |
+
" Upload a text file (.txt, .csv, .md). Content will be extracted automatically."
|
| 380 |
+
),
|
| 381 |
+
shiny::div(
|
| 382 |
+
class = "file-format-note",
|
| 383 |
+
shiny::tags$strong("File format guidelines:"),
|
| 384 |
+
shiny::tags$ul(
|
| 385 |
+
style = "margin: 5px 0 0 0; padding-left: 20px; font-size: 0.85em;",
|
| 386 |
+
shiny::tags$li(".txt: Plain text, separate items with double line breaks"),
|
| 387 |
+
shiny::tags$li(".csv: Single column of text entries (one per row)"),
|
| 388 |
+
shiny::tags$li(".md: Markdown file treated as plain text")
|
| 389 |
+
)
|
| 390 |
+
),
|
| 391 |
+
shiny::uiOutput("textfile_preview_ui")
|
| 392 |
+
),
|
| 393 |
+
|
| 394 |
+
# PDF upload panel (machine-readable)
|
| 395 |
+
shiny::div(
|
| 396 |
+
id = "panel-pdf",
|
| 397 |
+
class = "input-panel",
|
| 398 |
+
style = "display: none;",
|
| 399 |
+
shiny::fileInput(
|
| 400 |
+
"pdf_file",
|
| 401 |
+
NULL,
|
| 402 |
+
accept = ".pdf",
|
| 403 |
+
placeholder = "No file selected",
|
| 404 |
+
buttonLabel = shiny::tags$span(shiny::icon("upload"), " Browse...")
|
| 405 |
+
),
|
| 406 |
+
shiny::helpText(
|
| 407 |
+
shiny::icon("info-circle"),
|
| 408 |
+
" Upload a machine-readable PDF. Text will be extracted automatically."
|
| 409 |
+
),
|
| 410 |
+
shiny::div(
|
| 411 |
+
class = "image-limit-note",
|
| 412 |
+
shiny::icon("exclamation-circle"),
|
| 413 |
+
shiny::tags$em(paste(" Free app limit: Only the first", MAX_IMAGE_PAGES, "pages will be processed."))
|
| 414 |
+
),
|
| 415 |
+
shiny::uiOutput("pdf_preview_ui")
|
| 416 |
+
),
|
| 417 |
+
|
| 418 |
+
# Image upload panel
|
| 419 |
+
shiny::div(
|
| 420 |
+
id = "panel-image",
|
| 421 |
+
class = "input-panel",
|
| 422 |
+
style = "display: none;",
|
| 423 |
+
shiny::fileInput(
|
| 424 |
+
"image_file",
|
| 425 |
+
NULL,
|
| 426 |
+
accept = c(".png", ".jpg", ".jpeg", ".webp", ".gif"),
|
| 427 |
+
multiple = TRUE,
|
| 428 |
+
placeholder = "No file selected",
|
| 429 |
+
buttonLabel = shiny::tags$span(shiny::icon("upload"), " Browse...")
|
| 430 |
+
),
|
| 431 |
+
shiny::helpText(
|
| 432 |
+
shiny::icon("info-circle"),
|
| 433 |
+
" Upload image(s) containing text to extract. Supports PNG, JPEG, WebP, and GIF."
|
| 434 |
+
),
|
| 435 |
+
shiny::div(
|
| 436 |
+
class = "image-limit-note",
|
| 437 |
+
shiny::icon("exclamation-circle"),
|
| 438 |
+
shiny::tags$em(paste(" Free app limit: Maximum", MAX_IMAGE_PAGES, "images will be processed."))
|
| 439 |
+
),
|
| 440 |
+
shiny::uiOutput("image_preview_ui")
|
| 441 |
+
),
|
| 442 |
+
|
| 443 |
+
# Scanned PDF panel
|
| 444 |
+
shiny::div(
|
| 445 |
+
id = "panel-scanned",
|
| 446 |
+
class = "input-panel",
|
| 447 |
+
style = "display: none;",
|
| 448 |
+
shiny::fileInput(
|
| 449 |
+
"scanned_pdf_file",
|
| 450 |
+
NULL,
|
| 451 |
+
accept = ".pdf",
|
| 452 |
+
placeholder = "No file selected",
|
| 453 |
+
buttonLabel = shiny::tags$span(shiny::icon("upload"), " Browse...")
|
| 454 |
+
),
|
| 455 |
+
shiny::helpText(
|
| 456 |
+
shiny::icon("info-circle"),
|
| 457 |
+
" Upload a scanned/image-based PDF. Pages will be converted to images for AI vision processing."
|
| 458 |
+
),
|
| 459 |
+
shiny::div(
|
| 460 |
+
class = "image-limit-note",
|
| 461 |
+
shiny::icon("exclamation-circle"),
|
| 462 |
+
shiny::tags$em(paste(" Free app limit: Only the first", MAX_IMAGE_PAGES, "pages will be processed."))
|
| 463 |
+
),
|
| 464 |
+
shiny::uiOutput("scanned_pdf_preview_ui")
|
| 465 |
+
),
|
| 466 |
+
|
| 467 |
+
shiny::hr(class = "field-separator"),
|
| 468 |
+
|
| 469 |
shiny::numericInput(
|
| 470 |
+
"num_fields",
|
| 471 |
+
shiny::tags$span(
|
| 472 |
+
shiny::icon("list-ol"),
|
| 473 |
+
" Number of Fields to Extract:"
|
| 474 |
+
),
|
| 475 |
+
value = num_example_fields,
|
| 476 |
+
min = 1,
|
| 477 |
max = 10
|
| 478 |
),
|
| 479 |
+
|
| 480 |
+
shiny::helpText(
|
| 481 |
+
shiny::icon("info-circle"),
|
| 482 |
+
" Define each field with a clear label (e.g., 'manufacturer') and description (e.g., 'The name of the company recalling the vehicles')."
|
| 483 |
+
),
|
| 484 |
+
|
| 485 |
+
shiny::hr(class = "field-separator"),
|
| 486 |
+
|
| 487 |
+
shiny::tags$h5(
|
| 488 |
+
style = "color: #C41230; margin-bottom: 10px;",
|
| 489 |
+
shiny::icon("tags"),
|
| 490 |
+
" Field Definitions"
|
| 491 |
+
),
|
| 492 |
+
|
| 493 |
shiny::uiOutput("fields_ui"),
|
| 494 |
+
|
| 495 |
+
shiny::div(
|
| 496 |
+
style = "margin-top: 20px;",
|
| 497 |
+
shiny::actionButton(
|
| 498 |
+
"extract_btn",
|
| 499 |
+
shiny::tags$span(shiny::icon("magic"), " Extract Data"),
|
| 500 |
+
class = "btn-primary"
|
| 501 |
+
)
|
| 502 |
+
)
|
| 503 |
),
|
| 504 |
+
|
| 505 |
shiny::mainPanel(
|
| 506 |
+
width = 8,
|
| 507 |
+
class = "main-panel",
|
| 508 |
+
|
| 509 |
+
shiny::div(
|
| 510 |
+
class = "info-card",
|
| 511 |
+
shiny::tags$h3(
|
| 512 |
+
class = "section-heading",
|
| 513 |
+
shiny::icon("table"),
|
| 514 |
+
" Structured Data Extracted Using AI"
|
| 515 |
+
),
|
| 516 |
+
shiny::div(
|
| 517 |
+
class = "results-placeholder",
|
| 518 |
+
id = "results-placeholder",
|
| 519 |
+
shiny::icon("arrow-left"),
|
| 520 |
+
" Configure your extraction fields and click 'Extract Data' to see results here"
|
| 521 |
+
),
|
| 522 |
+
shiny::tableOutput("extracted_table"),
|
| 523 |
+
shiny::uiOutput("download_btn_ui")
|
| 524 |
+
),
|
| 525 |
+
|
| 526 |
+
shiny::div(
|
| 527 |
+
class = "tips-section",
|
| 528 |
+
shiny::tags$h4(shiny::icon("lightbulb"), " Tips for Better Results"),
|
| 529 |
shiny::tags$ul(
|
| 530 |
+
shiny::tags$li("Use ", shiny::tags$strong("specific field descriptions"), " to guide the AI accurately"),
|
| 531 |
+
shiny::tags$li("Start with ", shiny::tags$strong("more fields"), " and remove unnecessary ones later"),
|
| 532 |
+
shiny::tags$li("If results are inaccurate, try ", shiny::tags$strong("rephrasing"), " your field descriptions"),
|
| 533 |
+
shiny::tags$li("For ", shiny::tags$strong("multiple text blocks"), ", separate each with a double line break"),
|
| 534 |
+
shiny::tags$li("Each text block should contain ", shiny::tags$strong("complete information"), " for all fields you want to extract")
|
| 535 |
)
|
| 536 |
),
|
| 537 |
+
|
| 538 |
+
shiny::div(
|
| 539 |
+
class = "note-section",
|
| 540 |
+
shiny::tags$h4(shiny::icon("cog"), " Technical Note"),
|
| 541 |
+
shiny::p(
|
| 542 |
+
"To ensure timely results (since this is hosted on a CPU), we utilize ",
|
| 543 |
+
shiny::tags$code("gpt-5-mini-2025-08-07"),
|
| 544 |
+
" for this demo."
|
| 545 |
+
),
|
| 546 |
+
shiny::p(
|
| 547 |
+
"For ",
|
| 548 |
+
shiny::tags$strong("complete privacy"),
|
| 549 |
+
", consider using local open-weight models via ",
|
| 550 |
+
shiny::tags$a(
|
| 551 |
+
href = "https://ellmer.tidyverse.org/reference/chat_ollama.html",
|
| 552 |
+
target = "_blank",
|
| 553 |
+
shiny::tags$code("chat_ollama()")
|
| 554 |
+
),
|
| 555 |
+
" from the ",
|
| 556 |
+
shiny::tags$a(
|
| 557 |
+
href = "https://ellmer.tidyverse.org/",
|
| 558 |
+
target = "_blank",
|
| 559 |
+
"ellmer"
|
| 560 |
+
),
|
| 561 |
+
" library, which connects to ",
|
| 562 |
+
shiny::tags$a(
|
| 563 |
+
href = "https://ollama.com/",
|
| 564 |
+
target = "_blank",
|
| 565 |
+
"Ollama"
|
| 566 |
+
),
|
| 567 |
+
" for running models locally on your machine."
|
| 568 |
+
),
|
| 569 |
+
shiny::p(
|
| 570 |
+
"Alternatively, for higher accuracy requirements, users can leverage more performant closed models (e.g., ",
|
| 571 |
+
shiny::tags$em("gpt-5.2-2025-12-11"),
|
| 572 |
+
", ",
|
| 573 |
+
shiny::tags$em("claude-opus-4-5-20251101"),
|
| 574 |
+
", or ",
|
| 575 |
+
shiny::tags$em("gemini-3-pro-preview"),
|
| 576 |
+
") depending on the application needs. ",
|
| 577 |
+
"Note that Claude models require ",
|
| 578 |
+
shiny::tags$a(
|
| 579 |
+
href = "https://ellmer.tidyverse.org/reference/chat_anthropic.html",
|
| 580 |
+
target = "_blank",
|
| 581 |
+
shiny::tags$code("chat_anthropic()")
|
| 582 |
+
),
|
| 583 |
+
" and Gemini models require ",
|
| 584 |
+
shiny::tags$a(
|
| 585 |
+
href = "https://ellmer.tidyverse.org/reference/chat_google_gemini.html",
|
| 586 |
+
target = "_blank",
|
| 587 |
+
shiny::tags$code("chat_google_gemini()")
|
| 588 |
+
),
|
| 589 |
+
", each with their respective API keys configured in the R environment."
|
| 590 |
+
),
|
| 591 |
+
shiny::p(
|
| 592 |
+
shiny::tags$em(
|
| 593 |
+
"This demo uses ",
|
| 594 |
+
shiny::tags$code("chat_openai()"),
|
| 595 |
+
" only and does not provide an option to change the gpt-5-mini-2025-08-07 model."
|
| 596 |
+
)
|
| 597 |
+
)
|
| 598 |
)
|
| 599 |
)
|
| 600 |
+
),
|
| 601 |
+
|
| 602 |
+
# Footer
|
| 603 |
+
shiny::div(
|
| 604 |
+
class = "app-footer",
|
| 605 |
+
shiny::p(
|
| 606 |
+
"Built with ",
|
| 607 |
+
shiny::tags$a(href = "https://shiny.posit.co/", target = "_blank", "Shiny"),
|
| 608 |
+
" and ",
|
| 609 |
+
shiny::tags$a(href = "https://ellmer.tidyverse.org/", target = "_blank", "ellmer"),
|
| 610 |
+
" | ",
|
| 611 |
+
"Powered by OpenAI"
|
| 612 |
+
),
|
| 613 |
+
shiny::p(
|
| 614 |
+
shiny::tags$em(
|
| 615 |
+
"Companion app to: 'What Should Quality Engineers Know about Generative AI', submitted by the app's authors to ",
|
| 616 |
+
shiny::tags$a(
|
| 617 |
+
href = "https://www.tandfonline.com/journals/lqen20",
|
| 618 |
+
target = "_blank",
|
| 619 |
+
"Quality Engineering."
|
| 620 |
+
)
|
| 621 |
+
)
|
| 622 |
+
),
|
| 623 |
+
# shiny::p(
|
| 624 |
+
# "\u00A9 2026 Developed by researchers from Miami University, the University of Dayton, and the University of Amsterdam."
|
| 625 |
+
# )
|
| 626 |
)
|
| 627 |
)
|
| 628 |
|
| 629 |
# Define server logic required to generate dynamic UI and extract data
|
| 630 |
server = function(input, output, session) {
|
| 631 |
+
|
| 632 |
+
# Reactive value to store demo text
|
| 633 |
+
demo_text = shiny::reactiveVal("")
|
| 634 |
+
|
| 635 |
+
# Reactive value to store extracted results (for table display and CSV download)
|
| 636 |
+
extracted_results = shiny::reactiveVal(NULL)
|
| 637 |
+
|
| 638 |
+
# Observer to clear data when switching tabs
|
| 639 |
+
shiny::observeEvent(input$input_method, {
|
| 640 |
+
# Clear the extracted results table
|
| 641 |
+
extracted_results(NULL)
|
| 642 |
+
output$extracted_table = shiny::renderTable(NULL)
|
| 643 |
+
|
| 644 |
+
# Clear the text area when switching to paste tab (so placeholder shows)
|
| 645 |
+
if (input$input_method == "paste") {
|
| 646 |
+
shiny::updateTextAreaInput(session, "input_text", value = "")
|
| 647 |
+
}
|
| 648 |
+
}, ignoreInit = TRUE)
|
| 649 |
+
|
| 650 |
# Load example data
|
| 651 |
shiny::observeEvent(input$load_example, {
|
| 652 |
example_text = "Ford Motor Company (Ford) is recalling certain 2021-2022 Bronco vehicles equipped with rearview camera systems and 8-inch screen displays. The rearview camera image may still be displayed after a backing event has ended. As such, these vehicles fail to comply with the requirements of Federal Motor Vehicle Safety Standard number 111, \"Rear Visibility.\"\n\nHonda (American Honda Motor Co.) is recalling certain 2022-2025 Acura MDX Type-S, 2023-2025 Honda Pilot, and 2021-2025 Acura TLX Type-S vehicles. A software error in the fuel injection electronic control unit (FI-ECU) may cause an engine stall or a loss of power."
|
| 653 |
+
|
| 654 |
+
# Store demo text
|
| 655 |
+
demo_text(example_text)
|
| 656 |
+
|
| 657 |
+
# Also update the text area for extraction
|
| 658 |
shiny::updateTextAreaInput(session, "input_text", value = example_text)
|
| 659 |
+
|
| 660 |
# Set up example fields
|
| 661 |
shiny::updateNumericInput(session, "num_fields", value = num_example_fields)
|
| 662 |
+
|
| 663 |
+
shiny::showNotification(
|
| 664 |
+
"Demo data loaded! Click 'Extract Data' to see results.",
|
| 665 |
+
type = "message",
|
| 666 |
+
duration = 3
|
| 667 |
+
)
|
| 668 |
})
|
| 669 |
+
|
| 670 |
+
# Render demo preview
|
| 671 |
+
output$demo_preview_ui = shiny::renderUI({
|
| 672 |
+
text = demo_text()
|
| 673 |
+
if (is.null(text) || nchar(text) == 0) return(NULL)
|
| 674 |
+
|
| 675 |
+
preview_text = if (nchar(text) > 500) {
|
| 676 |
+
paste0(substr(text, 1, 500), "...")
|
| 677 |
+
} else {
|
| 678 |
+
text
|
| 679 |
+
}
|
| 680 |
+
|
| 681 |
+
shiny::div(
|
| 682 |
+
class = "pdf-preview",
|
| 683 |
+
style = "margin-top: 15px;",
|
| 684 |
+
shiny::tags$strong("Loaded Demo Text:"),
|
| 685 |
+
shiny::tags$pre(preview_text)
|
| 686 |
+
)
|
| 687 |
+
})
|
| 688 |
+
|
| 689 |
+
# Reactive value to store PDF extracted text
|
| 690 |
+
pdf_text = shiny::reactiveVal("")
|
| 691 |
+
|
| 692 |
+
# Process uploaded PDF
|
| 693 |
+
shiny::observeEvent(input$pdf_file, {
|
| 694 |
+
shiny::req(input$pdf_file)
|
| 695 |
+
|
| 696 |
+
tryCatch({
|
| 697 |
+
# Extract text from PDF
|
| 698 |
+
pdf_path = input$pdf_file$datapath
|
| 699 |
+
extracted = pdftools::pdf_text(pdf_path)
|
| 700 |
+
|
| 701 |
+
# Limit to MAX_IMAGE_PAGES
|
| 702 |
+
total_pages = length(extracted)
|
| 703 |
+
if (total_pages > MAX_IMAGE_PAGES) {
|
| 704 |
+
extracted = extracted[1:MAX_IMAGE_PAGES]
|
| 705 |
+
shiny::showNotification(
|
| 706 |
+
paste("Only the first", MAX_IMAGE_PAGES, "pages will be processed (free app limit)."),
|
| 707 |
+
type = "warning",
|
| 708 |
+
duration = 5
|
| 709 |
+
)
|
| 710 |
+
}
|
| 711 |
+
|
| 712 |
+
# Combine all pages with double line breaks
|
| 713 |
+
combined_text = paste(extracted, collapse = "\n\n")
|
| 714 |
+
|
| 715 |
+
# Clean up the text (remove excessive whitespace)
|
| 716 |
+
combined_text = gsub("\\s+", " ", combined_text)
|
| 717 |
+
combined_text = trimws(combined_text)
|
| 718 |
+
|
| 719 |
+
# Store the extracted text
|
| 720 |
+
pdf_text(combined_text)
|
| 721 |
+
|
| 722 |
+
# Update the text area with extracted text
|
| 723 |
+
shiny::updateTextAreaInput(session, "input_text", value = combined_text)
|
| 724 |
+
|
| 725 |
+
shiny::showNotification(
|
| 726 |
+
paste("PDF processed successfully!", length(extracted), "page(s) extracted."),
|
| 727 |
+
type = "message",
|
| 728 |
+
duration = 3
|
| 729 |
+
)
|
| 730 |
+
}, error = function(e) {
|
| 731 |
+
shiny::showNotification(
|
| 732 |
+
paste("Error reading PDF:", e$message),
|
| 733 |
+
type = "error",
|
| 734 |
+
duration = NULL
|
| 735 |
+
)
|
| 736 |
+
})
|
| 737 |
+
})
|
| 738 |
+
|
| 739 |
+
# Render PDF text preview
|
| 740 |
+
output$pdf_preview_ui = shiny::renderUI({
|
| 741 |
+
text = pdf_text()
|
| 742 |
+
if (is.null(text) || nchar(text) == 0) return(NULL)
|
| 743 |
+
|
| 744 |
+
preview_text = if (nchar(text) > 500) {
|
| 745 |
+
paste0(substr(text, 1, 500), "...")
|
| 746 |
+
} else {
|
| 747 |
+
text
|
| 748 |
+
}
|
| 749 |
+
|
| 750 |
+
shiny::div(
|
| 751 |
+
class = "pdf-preview",
|
| 752 |
+
shiny::tags$strong("Extracted Text Preview:"),
|
| 753 |
+
shiny::tags$pre(preview_text)
|
| 754 |
+
)
|
| 755 |
+
})
|
| 756 |
+
|
| 757 |
+
# Reactive value for text file content
|
| 758 |
+
textfile_text = shiny::reactiveVal("")
|
| 759 |
+
|
| 760 |
+
# Process uploaded text file
|
| 761 |
+
shiny::observeEvent(input$text_file, {
|
| 762 |
+
shiny::req(input$text_file)
|
| 763 |
+
|
| 764 |
+
tryCatch({
|
| 765 |
+
file_path = input$text_file$datapath
|
| 766 |
+
file_name = input$text_file$name
|
| 767 |
+
file_ext = tolower(tools::file_ext(file_name))
|
| 768 |
+
|
| 769 |
+
extracted_text = ""
|
| 770 |
+
|
| 771 |
+
if (file_ext == "csv") {
|
| 772 |
+
# Read CSV - assume single column or use first column
|
| 773 |
+
csv_data = utils::read.csv(file_path, header = TRUE, stringsAsFactors = FALSE)
|
| 774 |
+
if (ncol(csv_data) >= 1) {
|
| 775 |
+
# Use first column, combine rows with double line breaks
|
| 776 |
+
extracted_text = paste(csv_data[[1]], collapse = "\n\n")
|
| 777 |
+
}
|
| 778 |
+
} else {
|
| 779 |
+
# Read txt or md as plain text
|
| 780 |
+
extracted_text = paste(readLines(file_path, warn = FALSE), collapse = "\n")
|
| 781 |
+
}
|
| 782 |
+
|
| 783 |
+
# Clean up the text
|
| 784 |
+
extracted_text = trimws(extracted_text)
|
| 785 |
+
|
| 786 |
+
# Store the extracted text
|
| 787 |
+
textfile_text(extracted_text)
|
| 788 |
+
|
| 789 |
+
# Update the text area with extracted text
|
| 790 |
+
shiny::updateTextAreaInput(session, "input_text", value = extracted_text)
|
| 791 |
+
|
| 792 |
+
shiny::showNotification(
|
| 793 |
+
paste("File processed successfully!"),
|
| 794 |
+
type = "message",
|
| 795 |
+
duration = 3
|
| 796 |
+
)
|
| 797 |
+
}, error = function(e) {
|
| 798 |
+
shiny::showNotification(
|
| 799 |
+
paste("Error reading file:", e$message),
|
| 800 |
+
type = "error",
|
| 801 |
+
duration = NULL
|
| 802 |
+
)
|
| 803 |
+
})
|
| 804 |
+
})
|
| 805 |
+
|
| 806 |
+
# Render text file preview
|
| 807 |
+
output$textfile_preview_ui = shiny::renderUI({
|
| 808 |
+
text = textfile_text()
|
| 809 |
+
if (is.null(text) || nchar(text) == 0) return(NULL)
|
| 810 |
+
|
| 811 |
+
preview_text = if (nchar(text) > 500) {
|
| 812 |
+
paste0(substr(text, 1, 500), "...")
|
| 813 |
+
} else {
|
| 814 |
+
text
|
| 815 |
+
}
|
| 816 |
+
|
| 817 |
+
shiny::div(
|
| 818 |
+
class = "pdf-preview",
|
| 819 |
+
shiny::tags$strong("Extracted Text Preview:"),
|
| 820 |
+
shiny::tags$pre(preview_text)
|
| 821 |
+
)
|
| 822 |
+
})
|
| 823 |
+
|
| 824 |
+
# Reactive values to store image paths for vision-based extraction
|
| 825 |
+
image_paths = shiny::reactiveVal(NULL)
|
| 826 |
+
scanned_pdf_paths = shiny::reactiveVal(NULL)
|
| 827 |
+
|
| 828 |
+
# Process uploaded images
|
| 829 |
+
shiny::observeEvent(input$image_file, {
|
| 830 |
+
shiny::req(input$image_file)
|
| 831 |
+
|
| 832 |
+
tryCatch({
|
| 833 |
+
files = input$image_file
|
| 834 |
+
# Limit to MAX_IMAGE_PAGES
|
| 835 |
+
n_files = min(nrow(files), MAX_IMAGE_PAGES)
|
| 836 |
+
|
| 837 |
+
if (nrow(files) > MAX_IMAGE_PAGES) {
|
| 838 |
+
shiny::showNotification(
|
| 839 |
+
paste("Only the first", MAX_IMAGE_PAGES, "images will be processed (free app limit)."),
|
| 840 |
+
type = "warning",
|
| 841 |
+
duration = 5
|
| 842 |
+
)
|
| 843 |
+
}
|
| 844 |
+
|
| 845 |
+
# Store paths of uploaded images
|
| 846 |
+
paths = files$datapath[1:n_files]
|
| 847 |
+
image_paths(paths)
|
| 848 |
+
|
| 849 |
+
shiny::showNotification(
|
| 850 |
+
paste(n_files, "image(s) uploaded successfully!"),
|
| 851 |
+
type = "message",
|
| 852 |
+
duration = 3
|
| 853 |
+
)
|
| 854 |
+
}, error = function(e) {
|
| 855 |
+
shiny::showNotification(
|
| 856 |
+
paste("Error uploading images:", e$message),
|
| 857 |
+
type = "error",
|
| 858 |
+
duration = NULL
|
| 859 |
+
)
|
| 860 |
+
})
|
| 861 |
+
})
|
| 862 |
+
|
| 863 |
+
# Render image preview
|
| 864 |
+
output$image_preview_ui = shiny::renderUI({
|
| 865 |
+
paths = image_paths()
|
| 866 |
+
if (is.null(paths) || length(paths) == 0) return(NULL)
|
| 867 |
+
|
| 868 |
+
shiny::tagList(
|
| 869 |
+
shiny::div(
|
| 870 |
+
class = "image-preview-grid",
|
| 871 |
+
lapply(seq_along(paths), function(i) {
|
| 872 |
+
shiny::div(
|
| 873 |
+
class = "image-preview-item",
|
| 874 |
+
shiny::tags$img(
|
| 875 |
+
src = base64enc::dataURI(file = paths[i], mime = "image/png"),
|
| 876 |
+
alt = paste("Image", i)
|
| 877 |
+
)
|
| 878 |
+
)
|
| 879 |
+
})
|
| 880 |
+
),
|
| 881 |
+
shiny::div(
|
| 882 |
+
class = "image-preview-count",
|
| 883 |
+
paste(length(paths), "image(s) ready for extraction")
|
| 884 |
+
)
|
| 885 |
+
)
|
| 886 |
+
})
|
| 887 |
+
|
| 888 |
+
# Process scanned PDF (convert to images)
|
| 889 |
+
shiny::observeEvent(input$scanned_pdf_file, {
|
| 890 |
+
shiny::req(input$scanned_pdf_file)
|
| 891 |
+
|
| 892 |
+
tryCatch({
|
| 893 |
+
pdf_path = input$scanned_pdf_file$datapath
|
| 894 |
+
|
| 895 |
+
# Get number of pages
|
| 896 |
+
pdf_info = pdftools::pdf_info(pdf_path)
|
| 897 |
+
n_pages = min(pdf_info$pages, MAX_IMAGE_PAGES)
|
| 898 |
+
|
| 899 |
+
if (pdf_info$pages > MAX_IMAGE_PAGES) {
|
| 900 |
+
shiny::showNotification(
|
| 901 |
+
paste("Only the first", MAX_IMAGE_PAGES, "pages will be processed (free app limit)."),
|
| 902 |
+
type = "warning",
|
| 903 |
+
duration = 5
|
| 904 |
+
)
|
| 905 |
+
}
|
| 906 |
+
|
| 907 |
+
shiny::showNotification(
|
| 908 |
+
"Converting PDF pages to images...",
|
| 909 |
+
type = "message",
|
| 910 |
+
duration = NULL,
|
| 911 |
+
id = "convert_notif"
|
| 912 |
+
)
|
| 913 |
+
|
| 914 |
+
# Convert PDF pages to images using magick
|
| 915 |
+
pdf_images = magick::image_read_pdf(pdf_path, pages = 1:n_pages, density = 150)
|
| 916 |
+
|
| 917 |
+
# Save images to temp files
|
| 918 |
+
temp_paths = sapply(1:n_pages, function(i) {
|
| 919 |
+
temp_file = tempfile(fileext = ".png")
|
| 920 |
+
magick::image_write(pdf_images[i], temp_file, format = "png")
|
| 921 |
+
temp_file
|
| 922 |
+
})
|
| 923 |
+
|
| 924 |
+
scanned_pdf_paths(temp_paths)
|
| 925 |
+
|
| 926 |
+
shiny::removeNotification(id = "convert_notif")
|
| 927 |
+
shiny::showNotification(
|
| 928 |
+
paste(n_pages, "page(s) converted successfully!"),
|
| 929 |
+
type = "message",
|
| 930 |
+
duration = 3
|
| 931 |
+
)
|
| 932 |
+
}, error = function(e) {
|
| 933 |
+
shiny::removeNotification(id = "convert_notif")
|
| 934 |
+
shiny::showNotification(
|
| 935 |
+
paste("Error processing scanned PDF:", e$message),
|
| 936 |
+
type = "error",
|
| 937 |
+
duration = NULL
|
| 938 |
+
)
|
| 939 |
+
})
|
| 940 |
+
})
|
| 941 |
+
|
| 942 |
+
# Render scanned PDF preview
|
| 943 |
+
output$scanned_pdf_preview_ui = shiny::renderUI({
|
| 944 |
+
paths = scanned_pdf_paths()
|
| 945 |
+
if (is.null(paths) || length(paths) == 0) return(NULL)
|
| 946 |
+
|
| 947 |
+
shiny::tagList(
|
| 948 |
+
shiny::div(
|
| 949 |
+
class = "image-preview-grid",
|
| 950 |
+
lapply(seq_along(paths), function(i) {
|
| 951 |
+
shiny::div(
|
| 952 |
+
class = "image-preview-item",
|
| 953 |
+
shiny::tags$img(
|
| 954 |
+
src = base64enc::dataURI(file = paths[i], mime = "image/png"),
|
| 955 |
+
alt = paste("Page", i)
|
| 956 |
+
)
|
| 957 |
+
)
|
| 958 |
+
})
|
| 959 |
+
),
|
| 960 |
+
shiny::div(
|
| 961 |
+
class = "image-preview-count",
|
| 962 |
+
paste(length(paths), "page(s) ready for extraction")
|
| 963 |
+
)
|
| 964 |
+
)
|
| 965 |
+
})
|
| 966 |
+
|
| 967 |
# Dynamically generate UI elements for each field's label and description
|
| 968 |
output$fields_ui = shiny::renderUI({
|
| 969 |
n = input$num_fields
|
| 970 |
+
input_method = input$input_method
|
| 971 |
if (is.null(n) || n < 1) return(NULL)
|
| 972 |
+
if (is.null(input_method)) input_method = "demo"
|
| 973 |
+
|
| 974 |
+
# Use NHTSA example fields only for demo mode
|
| 975 |
+
if (input_method == "demo") {
|
| 976 |
+
example_labels = c("manufacturer", "defect_summary", "models", "model_years", "component", "fmvss_number", "root_cause", "risk")
|
| 977 |
+
example_descs = c(
|
| 978 |
+
"The name of the company recalling the vehicles.",
|
| 979 |
+
"Summary of the main defect.",
|
| 980 |
+
"List of affected vehicle models.",
|
| 981 |
+
"List of model years affected.",
|
| 982 |
+
"The part or system affected by the defect.",
|
| 983 |
+
"The FMVSS number mentioned, if any.",
|
| 984 |
+
"The root cause of the defect.",
|
| 985 |
+
"The risk or consequence posed by the defect."
|
| 986 |
+
)
|
| 987 |
+
} else {
|
| 988 |
+
# Generic placeholders for non-demo modes
|
| 989 |
+
example_labels = character(0)
|
| 990 |
+
example_descs = character(0)
|
| 991 |
+
}
|
| 992 |
+
|
| 993 |
fields = purrr::map(1:n, function(i) {
|
| 994 |
+
# Set default values - use examples for demo, generic for others
|
| 995 |
+
if (input_method == "demo" && i <= length(example_labels)) {
|
| 996 |
+
default_label = example_labels[i]
|
| 997 |
+
default_desc = example_descs[i]
|
| 998 |
+
} else {
|
| 999 |
+
default_label = paste0("field_", i)
|
| 1000 |
+
default_desc = "Describe what to extract for this field"
|
| 1001 |
+
}
|
| 1002 |
+
|
| 1003 |
+
shiny::div(
|
| 1004 |
+
style = "background-color: #EDECE2; padding: 12px; border-radius: 6px; margin-bottom: 12px;",
|
| 1005 |
+
shiny::tags$span(
|
| 1006 |
+
style = "color: #C41230; font-weight: 600; font-size: 0.9em;",
|
| 1007 |
+
paste("Field", i)
|
| 1008 |
+
),
|
| 1009 |
shiny::textInput(
|
| 1010 |
+
paste0("field_label_", i),
|
| 1011 |
+
"Label:",
|
| 1012 |
value = default_label
|
| 1013 |
),
|
| 1014 |
shiny::textInput(
|
| 1015 |
+
paste0("field_desc_", i),
|
| 1016 |
+
"Description:",
|
| 1017 |
value = default_desc
|
| 1018 |
+
)
|
|
|
|
| 1019 |
)
|
| 1020 |
})
|
| 1021 |
do.call(shiny::tagList, fields)
|
| 1022 |
})
|
| 1023 |
+
|
| 1024 |
# Build a custom type_object based on user-specified fields
|
| 1025 |
create_type_object = shiny::reactive({
|
| 1026 |
n = input$num_fields
|
| 1027 |
if (is.null(n) || n < 1) return(NULL)
|
| 1028 |
+
|
| 1029 |
# Build a list of field definitions
|
| 1030 |
type_list = list()
|
| 1031 |
+
for (i in 1:n) {
|
| 1032 |
label = input[[paste0("field_label_", i)]]
|
| 1033 |
+
desc = input[[paste0("field_desc_", i)]]
|
| 1034 |
+
if (!is.null(label) && label != "") {
|
| 1035 |
type_list[[label]] = ellmer::type_string(desc, required = FALSE)
|
| 1036 |
}
|
| 1037 |
}
|
| 1038 |
# Dynamically create the type object
|
| 1039 |
do.call(ellmer::type_object, type_list)
|
| 1040 |
})
|
| 1041 |
+
|
| 1042 |
# When the extract button is clicked, perform extraction
|
| 1043 |
shiny::observeEvent(input$extract_btn, {
|
| 1044 |
+
# Get the current input method
|
| 1045 |
+
input_method = input$input_method
|
| 1046 |
+
if (is.null(input_method)) input_method = "demo"
|
| 1047 |
+
|
| 1048 |
+
# Validate input based on method with helpful error messages
|
| 1049 |
+
has_data = FALSE
|
| 1050 |
+
error_msg = ""
|
| 1051 |
+
|
| 1052 |
+
# Suffix to add to error messages reminding about field configuration
|
| 1053 |
+
field_reminder = " Also, make sure to update the field labels and descriptions for your data."
|
| 1054 |
+
|
| 1055 |
+
if (input_method == "demo") {
|
| 1056 |
+
# Check if demo data was loaded
|
| 1057 |
+
if (is.null(demo_text()) || nchar(demo_text()) == 0) {
|
| 1058 |
+
error_msg = "Please click 'Load Demo Data' first to load the sample data."
|
| 1059 |
+
} else {
|
| 1060 |
+
has_data = TRUE
|
| 1061 |
+
}
|
| 1062 |
+
} else if (input_method == "paste") {
|
| 1063 |
+
# Check if text was pasted
|
| 1064 |
+
if (is.null(input$input_text) || nchar(trimws(input$input_text)) == 0) {
|
| 1065 |
+
error_msg = paste0("Please paste your text content in the text area.", field_reminder)
|
| 1066 |
+
} else {
|
| 1067 |
+
has_data = TRUE
|
| 1068 |
+
}
|
| 1069 |
+
} else if (input_method == "textfile") {
|
| 1070 |
+
# Check if text file was uploaded
|
| 1071 |
+
if (is.null(textfile_text()) || nchar(textfile_text()) == 0) {
|
| 1072 |
+
error_msg = paste0("Please upload a text file (.txt, .csv, or .md) first.", field_reminder)
|
| 1073 |
+
} else {
|
| 1074 |
+
has_data = TRUE
|
| 1075 |
+
}
|
| 1076 |
+
} else if (input_method == "pdf") {
|
| 1077 |
+
# Check if PDF was uploaded
|
| 1078 |
+
if (is.null(pdf_text()) || nchar(pdf_text()) == 0) {
|
| 1079 |
+
error_msg = paste0("Please upload a readable PDF file first.", field_reminder)
|
| 1080 |
+
} else {
|
| 1081 |
+
has_data = TRUE
|
| 1082 |
+
}
|
| 1083 |
+
} else if (input_method == "image") {
|
| 1084 |
+
# Check if images were uploaded
|
| 1085 |
+
if (is.null(image_paths()) || length(image_paths()) == 0) {
|
| 1086 |
+
error_msg = paste0("Please upload one or more image files first.", field_reminder)
|
| 1087 |
+
} else {
|
| 1088 |
+
has_data = TRUE
|
| 1089 |
+
}
|
| 1090 |
+
} else if (input_method == "scanned") {
|
| 1091 |
+
# Check if scanned PDF was uploaded
|
| 1092 |
+
if (is.null(scanned_pdf_paths()) || length(scanned_pdf_paths()) == 0) {
|
| 1093 |
+
error_msg = paste0("Please upload a scanned PDF file first.", field_reminder)
|
| 1094 |
+
} else {
|
| 1095 |
+
has_data = TRUE
|
| 1096 |
+
}
|
| 1097 |
+
}
|
| 1098 |
+
|
| 1099 |
+
# Show error if no data
|
| 1100 |
+
if (!has_data) {
|
| 1101 |
+
shiny::showNotification(
|
| 1102 |
+
shiny::tags$span(
|
| 1103 |
+
shiny::icon("exclamation-triangle"),
|
| 1104 |
+
" ",
|
| 1105 |
+
error_msg
|
| 1106 |
+
),
|
| 1107 |
+
type = "error",
|
| 1108 |
+
duration = 5
|
| 1109 |
+
)
|
| 1110 |
+
return()
|
| 1111 |
+
}
|
| 1112 |
+
|
| 1113 |
# Show processing indicator
|
| 1114 |
+
shiny::showNotification(
|
| 1115 |
+
shiny::tags$span(
|
| 1116 |
+
shiny::icon("spinner", class = "fa-spin"),
|
| 1117 |
+
" Processing extraction request..."
|
| 1118 |
+
),
|
| 1119 |
+
type = "message",
|
| 1120 |
+
duration = NULL,
|
| 1121 |
+
id = "extract_notif"
|
| 1122 |
+
)
|
| 1123 |
+
|
| 1124 |
custom_type_object = create_type_object()
|
| 1125 |
+
|
| 1126 |
# Initialize the chat object using the OpenAI API key from your environment
|
| 1127 |
tryCatch({
|
| 1128 |
# Check if API key is available
|
| 1129 |
if (Sys.getenv("OPENAI_API_KEY") == "") {
|
| 1130 |
stop("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
|
| 1131 |
}
|
| 1132 |
+
|
| 1133 |
chat = ellmer::chat_openai(
|
| 1134 |
+
model = "gpt-5-mini-2025-08-07",
|
| 1135 |
+
)
|
| 1136 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1137 |
all_results = list()
|
| 1138 |
+
|
| 1139 |
+
if (input_method %in% c("demo", "paste", "pdf", "textfile")) {
|
| 1140 |
+
# Text-based extraction
|
| 1141 |
+
# Split text by double linebreaks to process multiple entities
|
| 1142 |
+
text_blocks = unlist(strsplit(input$input_text, "\n\n"))
|
| 1143 |
+
text_blocks = text_blocks[text_blocks != ""] # Remove empty blocks
|
| 1144 |
+
|
| 1145 |
+
# Process each text block
|
| 1146 |
+
for (i in seq_along(text_blocks)) {
|
| 1147 |
+
result = chat$chat_structured(text_blocks[i], type = custom_type_object)
|
| 1148 |
+
if (is.list(result)) {
|
| 1149 |
+
result$source_id = i
|
| 1150 |
+
all_results[[i]] = result
|
| 1151 |
+
}
|
| 1152 |
+
}
|
| 1153 |
+
|
| 1154 |
+
} else if (input_method == "image") {
|
| 1155 |
+
# Vision-based extraction from uploaded images
|
| 1156 |
+
paths = image_paths()
|
| 1157 |
+
|
| 1158 |
+
for (i in seq_along(paths)) {
|
| 1159 |
+
# Create image content for the model
|
| 1160 |
+
image_content = ellmer::content_image_file(paths[i], resize = "high")
|
| 1161 |
+
result = chat$chat_structured(image_content, type = custom_type_object)
|
| 1162 |
+
if (is.list(result)) {
|
| 1163 |
+
result$source_id = paste("Image", i)
|
| 1164 |
+
all_results[[i]] = result
|
| 1165 |
+
}
|
| 1166 |
+
}
|
| 1167 |
+
|
| 1168 |
+
} else if (input_method == "scanned") {
|
| 1169 |
+
# Vision-based extraction from scanned PDF pages
|
| 1170 |
+
paths = scanned_pdf_paths()
|
| 1171 |
+
|
| 1172 |
+
for (i in seq_along(paths)) {
|
| 1173 |
+
# Create image content for the model
|
| 1174 |
+
image_content = ellmer::content_image_file(paths[i], resize = "high")
|
| 1175 |
+
result = chat$chat_structured(image_content, type = custom_type_object)
|
| 1176 |
+
if (is.list(result)) {
|
| 1177 |
+
result$source_id = paste("Page", i)
|
| 1178 |
+
all_results[[i]] = result
|
| 1179 |
+
}
|
| 1180 |
}
|
| 1181 |
}
|
| 1182 |
+
|
| 1183 |
# Combine all results into a single data frame
|
| 1184 |
if (length(all_results) > 0) {
|
| 1185 |
combined_results = do.call(rbind, lapply(all_results, function(x) {
|
| 1186 |
# Ensure all results have the same columns by converting to data frame
|
| 1187 |
as.data.frame(x)
|
| 1188 |
}))
|
| 1189 |
+
|
| 1190 |
+
# Store results for CSV download
|
| 1191 |
+
extracted_results(combined_results)
|
| 1192 |
+
|
| 1193 |
# Render the output as a table
|
| 1194 |
+
output$extracted_table = shiny::renderTable(
|
| 1195 |
+
{
|
| 1196 |
+
combined_results
|
| 1197 |
+
},
|
| 1198 |
+
rownames = TRUE,
|
| 1199 |
+
striped = TRUE,
|
| 1200 |
+
hover = TRUE,
|
| 1201 |
+
bordered = TRUE
|
| 1202 |
+
)
|
| 1203 |
} else {
|
| 1204 |
# Handle the case when no valid results are returned
|
| 1205 |
+
extracted_results(NULL)
|
| 1206 |
output$extracted_table = shiny::renderTable({
|
| 1207 |
+
data.frame(Message = "No valid data could be extracted. Please check your input and field definitions.")
|
| 1208 |
})
|
| 1209 |
}
|
| 1210 |
+
|
| 1211 |
# Remove notification
|
| 1212 |
shiny::removeNotification(id = "extract_notif")
|
| 1213 |
+
shiny::showNotification(
|
| 1214 |
+
shiny::tags$span(
|
| 1215 |
+
shiny::icon("check-circle"),
|
| 1216 |
+
" Extraction complete!"
|
| 1217 |
+
),
|
| 1218 |
+
type = "message",
|
| 1219 |
+
duration = 3
|
| 1220 |
+
)
|
| 1221 |
}, error = function(e) {
|
| 1222 |
# Handle errors
|
| 1223 |
shiny::removeNotification(id = "extract_notif")
|
| 1224 |
+
shiny::showNotification(
|
| 1225 |
+
shiny::tags$span(
|
| 1226 |
+
shiny::icon("exclamation-triangle"),
|
| 1227 |
+
" Error: ",
|
| 1228 |
+
e$message
|
| 1229 |
+
),
|
| 1230 |
+
type = "error",
|
| 1231 |
+
duration = NULL
|
| 1232 |
+
)
|
| 1233 |
})
|
| 1234 |
})
|
| 1235 |
+
|
| 1236 |
+
# Render download button only when results are available
|
| 1237 |
+
output$download_btn_ui = shiny::renderUI({
|
| 1238 |
+
results = extracted_results()
|
| 1239 |
+
if (is.null(results) || nrow(results) == 0) return(NULL)
|
| 1240 |
+
|
| 1241 |
+
shiny::div(
|
| 1242 |
+
style = "margin-top: 15px;",
|
| 1243 |
+
shiny::downloadButton(
|
| 1244 |
+
"download_csv",
|
| 1245 |
+
"Download as CSV",
|
| 1246 |
+
class = "btn-info"
|
| 1247 |
+
)
|
| 1248 |
+
)
|
| 1249 |
+
})
|
| 1250 |
+
|
| 1251 |
+
# Download handler for CSV export
|
| 1252 |
+
output$download_csv = shiny::downloadHandler(
|
| 1253 |
+
filename = function() {
|
| 1254 |
+
paste0("extracted_data_", format(Sys.time(), "%Y%m%d_%H%M%S"), ".csv")
|
| 1255 |
+
},
|
| 1256 |
+
content = function(file) {
|
| 1257 |
+
results = extracted_results()
|
| 1258 |
+
if (!is.null(results)) {
|
| 1259 |
+
utils::write.csv(results, file, row.names = FALSE)
|
| 1260 |
+
}
|
| 1261 |
+
}
|
| 1262 |
+
)
|
| 1263 |
}
|
| 1264 |
|
| 1265 |
# Run the Shiny app
|
| 1266 |
+
shiny::shinyApp(ui = ui, server = server)
|