Deevyankar commited on
Commit
843f763
·
verified ·
1 Parent(s): 8a4a27c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -2
app.py CHANGED
@@ -16,17 +16,28 @@ def extract_text_from_pdf(pdf_file):
16
  full_text += page.get_text()
17
  return full_text
18
 
 
19
 
20
- # Extract lines from uploaded LO file (.txt or .docx)
21
  def extract_los(lo_file):
22
  if lo_file.name.endswith('.txt'):
23
  return lo_file.read().decode('utf-8').splitlines()
24
  elif lo_file.name.endswith('.docx'):
25
- doc = Document(lo_file)
 
26
  return [para.text.strip() for para in doc.paragraphs if para.text.strip()]
27
  else:
28
  return []
29
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # Main function to compare PDFs and assess LO coverage
32
  def compare_and_assess(old_pdf, new_pdf, lo_file):
 
16
  full_text += page.get_text()
17
  return full_text
18
 
19
+ import io
20
 
 
21
  def extract_los(lo_file):
22
  if lo_file.name.endswith('.txt'):
23
  return lo_file.read().decode('utf-8').splitlines()
24
  elif lo_file.name.endswith('.docx'):
25
+ file_bytes = io.BytesIO(lo_file.read())
26
+ doc = Document(file_bytes)
27
  return [para.text.strip() for para in doc.paragraphs if para.text.strip()]
28
  else:
29
  return []
30
 
31
+ """"# Extract lines from uploaded LO file (.txt or .docx)
32
+ def extract_los(lo_file):
33
+ if lo_file.name.endswith('.txt'):
34
+ return lo_file.read().decode('utf-8').splitlines()
35
+ elif lo_file.name.endswith('.docx'):
36
+ doc = Document(lo_file)
37
+ return [para.text.strip() for para in doc.paragraphs if para.text.strip()]
38
+ else:
39
+ return []"""
40
+
41
 
42
  # Main function to compare PDFs and assess LO coverage
43
  def compare_and_assess(old_pdf, new_pdf, lo_file):