Spaces:
Sleeping
Sleeping
| #pip install PyPDF2 | |
| import streamlit as st | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| # Load the model and tokenizer | |
| #tokenizer = AutoTokenizer.from_pretrained("your_huggingface_model_path") | |
| #model = AutoModelForSequenceClassification.from_pretrained("your_huggingface_model_path") | |
| # Define genre labels | |
| genre_labels = ["mystery", "sci-fi", "fantasy", "romance", "thriller", "horror", "drama", "comedy", | |
| "historical fiction", "adventure", "action", "young adult", "classic", "biography", | |
| "non-fiction", "self-help", "children's literature", "poetry", "crime", "dystopian"] | |
| st.title("Book Genre Classifier") | |
| # Text input | |
| #file = st.file_uploader("Upload the pdf file") | |
| #import streamlit as st | |
| from PyPDF2 import PdfReader | |
| # Streamlit app | |
| st.subheader("PDF Text Extractor") | |
| # Upload PDF | |
| uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") | |
| if uploaded_file: | |
| # Extract text from the uploaded PDF | |
| reader = PdfReader(uploaded_file) | |
| all_text = "" | |
| for page in reader.pages: | |
| all_text += page.extract_text() | |
| # Display extracted text | |
| st.subheader("Extracted Text") | |
| st.text_area("PDF Content", all_text, height=300) | |
| #book_text = st.text_area("Enter the book's text or summary:", "") | |
| if st.button("Classify"): | |
| with st.spinner("Classifying..."): | |
| inputs = tokenizer(all_text, return_tensors="pt", truncation=True, padding=True) | |
| outputs = model(**inputs) | |
| scores = torch.softmax(outputs.logits, dim=1).detach().numpy() | |
| # Display results | |
| st.subheader("Predicted Genres:") | |
| for i, label in enumerate(genre_labels): | |
| st.write(f"{label}: {scores[0][i]:.2f}") |