Spaces:

Fluospark128
/

Genre_Prediction_App

Sleeping

App Files Files Community

Fluospark128 commited on Dec 27, 2024

Commit

d70b0c6

verified ·

1 Parent(s): 8ef5e77

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -24

app.py CHANGED Viewed

@@ -16,32 +16,32 @@ st.write("Upload a PDF file, and this app will classify its genres using zero-sh
 # Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
     reader = PdfReader(pdf_file)
-    text = ""
-    for page in reader.pages:
         text += page.extract_text()
     return text
     # File uploader
-    pdf_file = st.file_uploader("Upload PDF", type=["pdf"])
-    if pdf_file is not None:
-        st.write("Processing the PDF...")
-        text = extract_text_from_pdf(pdf_file)
-        if text.strip():
-            st.write("PDF Text Extracted. Performing Genre Classification...")
-            classifier = pipeline("zero-shot-classification", model = "facebook/bart-large-mnli") #load_classifier()
-            # Define candidate genres
-            candidate_labels =["Romance", "Mystery", "Thriller", "Science Fiction", "Fantasy", "Horror", "Historical Fiction", "Crime", "Western", "Dystopian", "Biography", "Autobiography", "Memoir", "History", "Self-Help", "Travel", "Essay", "Journalism", "Sonnet", "Haiku", "Free Verse", "Narrative Poetry", "Lyric Poetry", "Tragedy", "Comedy", "Melodrama", "Farce", "Graphic Novel", "Epistolary", "Magical Realism", "Satire", "Young Adult Fiction"]
-            # Perform zero-shot classification
-            result = classifier(text),#[:1000], candidate_labels, multi_label=True)  # Using the first 1000 characters
-            genres = sorted(zip(result["labels"], result["scores"]), key=lambda x: x[1], reverse=True)
-            st.subheader("Top 20 Detected Genres:")
-            top_genres = genres[:20]  # Get the top 20 genres
-            for genre, score in top_genres:
-                st.write(f"**{genre.capitalize()}**: {score:.2f}")
-        else:
-            st.error("No text could be extracted from the PDF. Please try another file.")

 # Function to extract text from PDF
 def extract_text_from_pdf(pdf_file):
     reader = PdfReader(pdf_file)
+    text = ""
+    for page in reader.pages:
         text += page.extract_text()
     return text
     # File uploader
+pdf_file = st.file_uploader("Upload PDF", type=["pdf"])
+if pdf_file is not None:
+    st.write("Processing the PDF...")
+    text = extract_text_from_pdf(pdf_file)
+    if text.strip():
+        st.write("PDF Text Extracted. Performing Genre Classification...")
+        classifier = pipeline("zero-shot-classification", model = "facebook/bart-large-mnli") #load_classifier()
+        # Define candidate genres
+        candidate_labels =["Romance", "Mystery", "Thriller", "Science Fiction", "Fantasy", "Horror", "Historical Fiction", "Crime", "Western", "Dystopian", "Biography", "Autobiography", "Memoir", "History", "Self-Help", "Travel", "Essay", "Journalism", "Sonnet", "Haiku", "Free Verse", "Narrative Poetry", "Lyric Poetry", "Tragedy", "Comedy", "Melodrama", "Farce", "Graphic Novel", "Epistolary", "Magical Realism", "Satire", "Young Adult Fiction"]
+        # Perform zero-shot classification
+        result = classifier(text),#[:1000], candidate_labels, multi_label=True)  # Using the first 1000 characters
+        genres = sorted(zip(result["labels"], result["scores"]), key=lambda x: x[1], reverse=True)
+        st.subheader("Top 20 Detected Genres:")
+        top_genres = genres[:20]  # Get the top 20 genres
+        for genre, score in top_genres:
+            st.write(f"**{genre.capitalize()}**: {score:.2f}")
+    else:
+        st.error("No text could be extracted from the PDF. Please try another file.")