Fluospark128 commited on
Commit
d70b0c6
·
verified ·
1 Parent(s): 8ef5e77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -24
app.py CHANGED
@@ -16,32 +16,32 @@ st.write("Upload a PDF file, and this app will classify its genres using zero-sh
16
  # Function to extract text from PDF
17
  def extract_text_from_pdf(pdf_file):
18
  reader = PdfReader(pdf_file)
19
-     text = ""
20
-     for page in reader.pages:
21
          text += page.extract_text()
22
      return text
23
 
24
      # File uploader
25
-     pdf_file = st.file_uploader("Upload PDF", type=["pdf"])
26
-     if pdf_file is not None:
27
-         st.write("Processing the PDF...")
28
-         text = extract_text_from_pdf(pdf_file)
29
-
30
-         if text.strip():
31
-             st.write("PDF Text Extracted. Performing Genre Classification...")
32
-             classifier = pipeline("zero-shot-classification", model = "facebook/bart-large-mnli") #load_classifier()
33
-
34
-             # Define candidate genres
35
-             candidate_labels =["Romance", "Mystery", "Thriller", "Science Fiction", "Fantasy", "Horror", "Historical Fiction", "Crime", "Western", "Dystopian", "Biography", "Autobiography", "Memoir", "History", "Self-Help", "Travel", "Essay", "Journalism", "Sonnet", "Haiku", "Free Verse", "Narrative Poetry", "Lyric Poetry", "Tragedy", "Comedy", "Melodrama", "Farce", "Graphic Novel", "Epistolary", "Magical Realism", "Satire", "Young Adult Fiction"]
36
-
37
-             # Perform zero-shot classification
38
-             result = classifier(text),#[:1000], candidate_labels, multi_label=True)  # Using the first 1000 characters
39
-             genres = sorted(zip(result["labels"], result["scores"]), key=lambda x: x[1], reverse=True)
40
-
41
-             st.subheader("Top 20 Detected Genres:")
42
-             top_genres = genres[:20]  # Get the top 20 genres
43
-             for genre, score in top_genres:
44
-                 st.write(f"**{genre.capitalize()}**: {score:.2f}")
45
-         else:
46
-             st.error("No text could be extracted from the PDF. Please try another file.")
47
 
 
16
  # Function to extract text from PDF
17
  def extract_text_from_pdf(pdf_file):
18
  reader = PdfReader(pdf_file)
19
+ text = ""
20
+ for page in reader.pages:
21
          text += page.extract_text()
22
      return text
23
 
24
      # File uploader
25
+ pdf_file = st.file_uploader("Upload PDF", type=["pdf"])
26
+ if pdf_file is not None:
27
+ st.write("Processing the PDF...")
28
+     text = extract_text_from_pdf(pdf_file)
29
+
30
+     if text.strip():
31
+ st.write("PDF Text Extracted. Performing Genre Classification...")
32
+         classifier = pipeline("zero-shot-classification", model = "facebook/bart-large-mnli") #load_classifier()
33
+
34
+         # Define candidate genres
35
+         candidate_labels =["Romance", "Mystery", "Thriller", "Science Fiction", "Fantasy", "Horror", "Historical Fiction", "Crime", "Western", "Dystopian", "Biography", "Autobiography", "Memoir", "History", "Self-Help", "Travel", "Essay", "Journalism", "Sonnet", "Haiku", "Free Verse", "Narrative Poetry", "Lyric Poetry", "Tragedy", "Comedy", "Melodrama", "Farce", "Graphic Novel", "Epistolary", "Magical Realism", "Satire", "Young Adult Fiction"]
36
+
37
+         # Perform zero-shot classification
38
+         result = classifier(text),#[:1000], candidate_labels, multi_label=True)  # Using the first 1000 characters
39
+         genres = sorted(zip(result["labels"], result["scores"]), key=lambda x: x[1], reverse=True)
40
+
41
+         st.subheader("Top 20 Detected Genres:")
42
+         top_genres = genres[:20]  # Get the top 20 genres
43
+         for genre, score in top_genres:
44
+             st.write(f"**{genre.capitalize()}**: {score:.2f}")
45
+     else:
46
+         st.error("No text could be extracted from the PDF. Please try another file.")
47