Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,7 +6,6 @@ import fitz # PyMuPDF
|
|
| 6 |
import docx
|
| 7 |
import matplotlib.pyplot as plt
|
| 8 |
import pandas as pd
|
| 9 |
-
import tempfile
|
| 10 |
|
| 11 |
def extract_text_from_pdf(file):
|
| 12 |
text = ""
|
|
@@ -20,8 +19,7 @@ def extract_text_from_pdf(file):
|
|
| 20 |
|
| 21 |
def extract_text_from_docx(file):
|
| 22 |
doc = docx.Document(file)
|
| 23 |
-
return "
|
| 24 |
-
".join([para.text for para in doc.paragraphs])
|
| 25 |
|
| 26 |
def semantic_match(lo_texts, content):
|
| 27 |
vectorizer = TfidfVectorizer().fit_transform([content] + lo_texts)
|
|
@@ -40,8 +38,7 @@ def compare_handouts(old_pdf, new_pdf, lo_file):
|
|
| 40 |
|
| 41 |
# Extract Learning Outcomes
|
| 42 |
lo_text = extract_text_from_docx(lo_file)
|
| 43 |
-
lo_list = [line.strip() for line in lo_text.split("
|
| 44 |
-
") if line.strip()]
|
| 45 |
if not lo_list:
|
| 46 |
return "No learning outcomes detected.", None, None
|
| 47 |
|
|
|
|
| 6 |
import docx
|
| 7 |
import matplotlib.pyplot as plt
|
| 8 |
import pandas as pd
|
|
|
|
| 9 |
|
| 10 |
def extract_text_from_pdf(file):
|
| 11 |
text = ""
|
|
|
|
| 19 |
|
| 20 |
def extract_text_from_docx(file):
|
| 21 |
doc = docx.Document(file)
|
| 22 |
+
return "\n".join([para.text for para in doc.paragraphs])
|
|
|
|
| 23 |
|
| 24 |
def semantic_match(lo_texts, content):
|
| 25 |
vectorizer = TfidfVectorizer().fit_transform([content] + lo_texts)
|
|
|
|
| 38 |
|
| 39 |
# Extract Learning Outcomes
|
| 40 |
lo_text = extract_text_from_docx(lo_file)
|
| 41 |
+
lo_list = [line.strip() for line in lo_text.split("\n") if line.strip()]
|
|
|
|
| 42 |
if not lo_list:
|
| 43 |
return "No learning outcomes detected.", None, None
|
| 44 |
|