Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -184,10 +184,57 @@ if uploaded_file is not None:
|
|
| 184 |
|
| 185 |
df5 = pd.concat([boss, direct, colleague, other_colleague], axis = 0)
|
| 186 |
df5 = df5.dropna()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
st.write("## Output:")
|
|
|
|
| 189 |
st.write("### 1. Extracted dataset: Dimensions, Compentency Cluster, Raters and Scores by Raters")
|
| 190 |
st.dataframe(df_combined)
|
| 191 |
|
| 192 |
st.write("### 2. Extracted list of Strengths and Weaknesses rated by each Rater")
|
| 193 |
st.write(df5)
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
df5 = pd.concat([boss, direct, colleague, other_colleague], axis = 0)
|
| 186 |
df5 = df5.dropna()
|
| 187 |
+
|
| 188 |
+
sections = [
|
| 189 |
+
"Continue doing the following",
|
| 190 |
+
"Start doing the following",
|
| 191 |
+
"Reasons why I think that your behavior has worsened concerning the dimensions marked in the \"Perception & Change Section\" of the questionnaire",
|
| 192 |
+
"Further tips for your work in our organisation"
|
| 193 |
+
]
|
| 194 |
+
|
| 195 |
+
patterns = {
|
| 196 |
+
"Boss": r"VG\n(.*?)(?=\(Boss\))",
|
| 197 |
+
"Colleagues": r"Ke\n(.*?)(?=\(Colleagues\))",
|
| 198 |
+
"Customers": r"KU\n(.*?)(?=\(Internal/external customers\))"
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
# Function to extract comments for each section
|
| 202 |
+
def extract_comments(data, section):
|
| 203 |
+
section_pattern = rf"Kom\s+{re.escape(section)}:\n(.*?)(?=(?:IX\. Open Comments|$))"
|
| 204 |
+
section_data = re.search(section_pattern, data, re.DOTALL)
|
| 205 |
+
|
| 206 |
+
if not section_data:
|
| 207 |
+
return []
|
| 208 |
+
|
| 209 |
+
section_text = section_data.group(1)
|
| 210 |
+
comments = []
|
| 211 |
+
|
| 212 |
+
for rater, pattern in patterns.items():
|
| 213 |
+
matches = re.findall(pattern, section_text, re.DOTALL)
|
| 214 |
+
for match in matches:
|
| 215 |
+
comments.append({
|
| 216 |
+
"Section": section,
|
| 217 |
+
"Rater": rater,
|
| 218 |
+
"Comment": match.strip()
|
| 219 |
+
})
|
| 220 |
+
|
| 221 |
+
return comments
|
| 222 |
+
|
| 223 |
+
# Create dataframes for each section
|
| 224 |
+
all_comments = []
|
| 225 |
+
for section in sections:
|
| 226 |
+
all_comments.extend(extract_comments(pdf_text, section))
|
| 227 |
+
|
| 228 |
+
df6 = pd.DataFrame(all_comments)
|
| 229 |
+
|
| 230 |
|
| 231 |
st.write("## Output:")
|
| 232 |
+
|
| 233 |
st.write("### 1. Extracted dataset: Dimensions, Compentency Cluster, Raters and Scores by Raters")
|
| 234 |
st.dataframe(df_combined)
|
| 235 |
|
| 236 |
st.write("### 2. Extracted list of Strengths and Weaknesses rated by each Rater")
|
| 237 |
st.write(df5)
|
| 238 |
+
|
| 239 |
+
st.write("### 3. Extracted list of Open Comments by each Rater")
|
| 240 |
+
st.write(df6)
|