nghweigeok commited on
Commit
69c77f5
·
verified ·
1 Parent(s): 9afd3ef

Upload required 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +34 -0
  2. app.py +107 -0
  3. process_data.R +26 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a base image that includes both Python and R. rocker/r-ver is a popular choice for R, with an R base and Python installed.
2
+ FROM rocker/r-ver:4.1.0
3
+
4
+ # Install Python, pip, and system dependencies for your Python packages, e.g., for matplotlib.
5
+ # Dependencies being installed:
6
+ # - libfreetype6-dev, libxft-dev for matplotlib
7
+ # - libpcre2-dev for -lpcre2-8, liblzma-dev for -llzma, libbz2-dev for -lbz2, libicu-dev for -licuuc and -licui18n for rpy2 compilation requirements
8
+ RUN apt-get update && apt-get install -y \
9
+ python3-pip \
10
+ python3-dev \
11
+ libfreetype6-dev \
12
+ libxft-dev \
13
+ libpcre2-dev \
14
+ liblzma-dev \
15
+ libbz2-dev \
16
+ libicu-dev \
17
+ && rm -rf /var/lib/apt/lists/*
18
+
19
+ # Install R packages
20
+ RUN R -e "install.packages(c('relaimpo', 'readxl'), repos='http://cran.rstudio.com/')"
21
+
22
+ # Copy the requirements.txt file, your app script, and the R script into the container.
23
+ COPY requirements.txt /requirements.txt
24
+ COPY app.py /app.py
25
+ COPY process_data.R /process_data.R
26
+
27
+ # Install Python dependencies from requirements.txt.
28
+ RUN pip3 install --no-cache-dir -r /requirements.txt
29
+
30
+ # Expose the port Gradio runs on.
31
+ EXPOSE 7860
32
+
33
+ # Command to run your app.
34
+ CMD ["python3", "/app.py"]
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries
2
+ import subprocess
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import gradio as gr
7
+ import tempfile
8
+ import logging
9
+ from PIL import Image
10
+ import io
11
+
12
+ logging.basicConfig(level=logging.INFO)
13
+
14
+
15
+ def analyze_excel(file_path):
16
+ # Temporary file for the textual output
17
+ with tempfile.NamedTemporaryFile(
18
+ delete=True, suffix=".txt", mode="r+"
19
+ ) as text_output_temp:
20
+ text_output_path = text_output_temp.name
21
+
22
+ # Temporary file for the CSV data
23
+ with tempfile.NamedTemporaryFile(
24
+ delete=True, suffix=".csv", mode="r+"
25
+ ) as csv_output_temp:
26
+ csv_output_path = csv_output_temp.name
27
+
28
+ # Call the R script, passing paths for both outputs
29
+ command = [
30
+ "Rscript",
31
+ "process_data.R",
32
+ file_path,
33
+ text_output_path,
34
+ csv_output_path,
35
+ ]
36
+
37
+ try:
38
+ subprocess.run(command, check=True)
39
+ except subprocess.CalledProcessError as e:
40
+ logging.error("R script failed with error: %s", e)
41
+ return None, "Error executing R script. Please check the input file format."
42
+
43
+ # Read the detailed textual output
44
+ with open(text_output_path, "r") as file:
45
+ calc_relaimpo_text = file.read()
46
+
47
+ # Read the CSV data into a DataFrame for plotting
48
+ results_df = pd.read_csv(csv_output_path)
49
+
50
+ # Create a new column for plotting with values Importance * 100
51
+ results_df["Importance_percent"] = results_df["Importance"] * 100
52
+
53
+ # Plotting
54
+ fig, ax = plt.subplots(figsize=(12, 10))
55
+ sns.barplot(
56
+ x="Importance_percent", y="Predictor", data=results_df, color="skyblue", ax=ax
57
+ )
58
+
59
+ # Add percentage annotations to the bars
60
+ for index, value in enumerate(results_df["Importance_percent"]):
61
+ ax.text(value + 1, index, f"{value:.2f}%", va="center", fontsize=8)
62
+
63
+ plt.xlabel("Relative Importance (%)")
64
+ plt.ylabel("Predictors")
65
+ plt.title("Relative Importance Ranking of Predictors (LMG)")
66
+
67
+ # Adjust the x-axis limit to create space for annotations
68
+ plt.xlim(0, (results_df["Importance"].max() * 100) + 10)
69
+
70
+ plt.tight_layout()
71
+
72
+ # Convert plot to image data
73
+ img_data = io.BytesIO()
74
+ plt.savefig(img_data, format="png")
75
+ img_data.seek(0)
76
+ img = Image.open(img_data)
77
+
78
+ plt.close(fig) # Close the figure to free memory
79
+
80
+ # Return both the image and the detailed textual output
81
+ return img, calc_relaimpo_text
82
+
83
+
84
+ # Define instruction text and the Gradio app interface as before
85
+ instruction_text = """
86
+ ## Instructions
87
+
88
+ Please upload an Excel file with your data. The file should contain columns for Stability, Development, Relationship, Benefit, Vision, and Competence, along with Trust as the target variable.
89
+
90
+ The app will analyze the data, calculate the relative importance of predictors using Shapley values, and display the results.
91
+
92
+ **Note:** The analysis may take a few seconds. Please wait for the results to be displayed.
93
+ """
94
+
95
+ iface = gr.Interface(
96
+ fn=analyze_excel,
97
+ inputs=["file"],
98
+ outputs=[
99
+ gr.Image(type="pil", label="Shapley Regression Plot"),
100
+ gr.Textbox(label="Calculation Output"),
101
+ ],
102
+ title="TrustLogic: Trust Driver Analysis",
103
+ description=instruction_text,
104
+ allow_flagging="never",
105
+ )
106
+
107
+ iface.launch(server_name="0.0.0.0")
process_data.R ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ library(readxl)
2
+ library(relaimpo)
3
+
4
+ args <- commandArgs(trailingOnly = TRUE)
5
+ input_file <- args[1]
6
+ output_text_file <- args[2]
7
+ output_csv_file <- args[3]
8
+
9
+ # Read the data
10
+ data <- read_excel(input_file)
11
+
12
+ # Fit the model
13
+ model <- lm(Trust ~ Stability + Development + Relationship + Benefit + Vision + Competence, data = data)
14
+
15
+ # Calculate relative importance
16
+ calc_relaimpo <- calc.relimp(model, type = "lmg", rela = TRUE)
17
+
18
+ # Write the detailed output to a text file
19
+ full_output <- capture.output(print(calc_relaimpo))
20
+ writeLines(full_output, output_text_file)
21
+
22
+ # Extract relevant data from calc_relaimpo for plotting
23
+ results <- data.frame(Predictor = names(calc_relaimpo$lmg), Importance = calc_relaimpo$lmg)
24
+
25
+ # Save the plotting data to a CSV file
26
+ write.csv(results, file = output_csv_file, row.names = FALSE)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pandas
2
+ matplotlib
3
+ seaborn
4
+ gradio
5
+ Pillow
6
+ openpyxl