Spaces:

orYx-models
/

lds_pdf_parser

Running

App Files Files Community

Vineedhar commited on Jan 3, 2025

Commit

88164ab

verified ·

1 Parent(s): bbc90c5

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -0

app.py CHANGED Viewed

@@ -184,10 +184,57 @@ if uploaded_file is not None:
     df5 = pd.concat([boss, direct, colleague, other_colleague], axis = 0)
     df5 = df5.dropna()
     st.write("## Output:")
     st.write("### 1. Extracted dataset: Dimensions, Compentency Cluster, Raters and Scores by Raters")
     st.dataframe(df_combined)
     st.write("### 2. Extracted list of Strengths and Weaknesses rated by each Rater")
     st.write(df5)

     df5 = pd.concat([boss, direct, colleague, other_colleague], axis = 0)
     df5 = df5.dropna()
+    sections = [
+    "Continue doing the following",
+    "Start doing the following",
+    "Reasons why I think that your behavior has worsened concerning the dimensions marked in the \"Perception & Change Section\" of the questionnaire",
+    "Further tips for your work in our organisation"
+    ]
+    patterns = {
+        "Boss": r"VG\n(.*?)(?=\(Boss\))",
+        "Colleagues": r"Ke\n(.*?)(?=\(Colleagues\))",
+        "Customers": r"KU\n(.*?)(?=\(Internal/external customers\))"
+    }
+    # Function to extract comments for each section
+    def extract_comments(data, section):
+        section_pattern = rf"Kom\s+{re.escape(section)}:\n(.*?)(?=(?:IX\. Open Comments|$))"
+        section_data = re.search(section_pattern, data, re.DOTALL)
+        if not section_data:
+            return []
+        section_text = section_data.group(1)
+        comments = []
+        for rater, pattern in patterns.items():
+            matches = re.findall(pattern, section_text, re.DOTALL)
+            for match in matches:
+                comments.append({
+                    "Section": section,
+                    "Rater": rater,
+                    "Comment": match.strip()
+                })
+        return comments
+    # Create dataframes for each section
+    all_comments = []
+    for section in sections:
+        all_comments.extend(extract_comments(pdf_text, section))
+    df6 = pd.DataFrame(all_comments)
     st.write("## Output:")
     st.write("### 1. Extracted dataset: Dimensions, Compentency Cluster, Raters and Scores by Raters")
     st.dataframe(df_combined)
     st.write("### 2. Extracted list of Strengths and Weaknesses rated by each Rater")
     st.write(df5)
+    st.write("### 3. Extracted list of Open Comments by each Rater")
+    st.write(df6)