Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from huggify_data.scrape_modules import PDFQnAGenerator | |
| import tempfile | |
| def main(): | |
| st.set_page_config(layout="wide") | |
| st.title("PDF Question-Answer Generator using Huggify-Data Package") | |
| # Expander in the sidebar for instruction | |
| with st.sidebar.expander("Instruction"): | |
| st.write(''' | |
| π Introducing Huggify-Data: Your Ultimate PDF Data Scraping and Uploading Tool! π | |
| π I'm thrilled to present the new user-friendly interface for my Python package, huggify-data. This powerful tool simplifies the process of scraping data from PDFs and generating question and answer pairs using OpenAI, making it perfect for building conversational chatbots. π€β¨ | |
| π Key Features: | |
| 1. Easy PDF Data Extraction: Quickly scrape text content from PDFs and convert it into a structured data frame. | |
| 2. Automated Question-Answer Pair Generation: Extract meaningful question-answer pairs from your PDF content, ideal for training chatbots. | |
| 3. User-Friendly Interface: Interact with the package without any programming experience, making information accessibility easier and more efficient. | |
| π§ How It Works: | |
| - API Key: Add your OpenAI API Key. | |
| - Load Your PDF: Easily load any PDF file into the library. | |
| - Just wait: Wait and download the `.csv` from the app. | |
| π Why Huggify-Data? | |
| Whether you're a data scientist, developer, or AI enthusiast, Huggify-Data streamlines the process of preparing your PDF data for AI applications. It's never been easier to transform your PDFs into valuable datasets for building conversational AI models. | |
| π Links: | |
| - **GitHub Repository**: [https://lnkd.in/eJEJebcw](https://lnkd.in/eJEJebcw) | |
| - **Documentation**: [https://lnkd.in/eF9JFXAP](https://lnkd.in/eF9JFXAP) | |
| - **Notebook**: [https://lnkd.in/eaA2qaPt](https://lnkd.in/eaA2qaPt) | |
| - **App**: [https://huggingface.co/spaces/eagle0504/huggify-data](https://huggingface.co/spaces/eagle0504/huggify-data) | |
| Don't forget to like, comment, and subscribe for more updates and tutorials on AI and data science! ππ | |
| #HuggifyData #PythonLibrary #AI #DataScience #HuggingFace #PDFScraping #Chatbot #OpenSource #Yiqiao | |
| ''') | |
| # Sidebar for uploading the PDF file | |
| st.sidebar.title("Upload PDF") | |
| uploaded_file = st.sidebar.file_uploader("Choose a PDF file", type="pdf") | |
| # Text input for OpenAI API key | |
| openai_api_key = st.sidebar.text_input("Enter your OpenAI API key", type="password") | |
| # Embed YouTube video in the sidebar | |
| st.sidebar.video("https://youtu.be/CfMcw4OTLCQ") | |
| if uploaded_file is not None and openai_api_key: | |
| # Save the uploaded PDF to a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf: | |
| temp_pdf.write(uploaded_file.read()) | |
| temp_pdf_path = temp_pdf.name | |
| # Show a spinner while processing the PDF | |
| with st.spinner('Processing the PDF and generating questions and answers...'): | |
| # Process the PDF and generate the questions and answers | |
| generator = PDFQnAGenerator(temp_pdf_path, openai_api_key) | |
| generator.process_scraped_content() | |
| generator.generate_questions_answers() | |
| df = generator.convert_to_dataframe() | |
| # Display the resulting DataFrame | |
| st.subheader("Generated Question-Answer Pairs") | |
| st.write(df) | |
| # Option to download the DataFrame as a CSV | |
| csv = df.to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| label="Download as CSV", | |
| data=csv, | |
| file_name='questions_answers.csv', | |
| mime='text/csv', | |
| ) | |
| if __name__ == "__main__": | |
| main() | |