import streamlit as st # import langchain import PyPDF2 import os from transformers import BartTokenizer , BartForConditionalGeneration tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn") model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") def save_uploaded_file(uploaded_file): temp_dir = "temp_files" os.makedirs(temp_dir, exist_ok=True) file_path = os.path.join(temp_dir, uploaded_file.name) with open(file_path, "wb") as f: f.write(uploaded_file.getbuffer()) return file_path # Function to extract text from PDF def extract_text_from_pdf(pdf_file): text = "" RP_file = save_uploaded_file(pdf_file) with open(RP_file, "rb") as file: pdf_reader = PyPDF2.PdfReader(file) num_pages = len(pdf_reader.pages) for page_num in range(num_pages): page = pdf_reader.pages[page_num] text += page.extract_text() return text def generate_summary(text: str): # Tokenize the text tokens = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True) summary_ids = model.generate(tokens.input_ids, num_beams = 4, max_length = 200, early_stopping = True) return summary_ids # Function to summarize text def summarize_text(text: str) -> str: summary_ids = generate_summary(text) summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_ip_tokenization_spaces=False) return summary # Function to extract key information from the paper def extract_paper_info(text): # Logic to extract key information from the paper (e.g., using regex, NLP techniques) # This part can be expanded based on the specific requirements pass # Function to build and fine-tune the chatbot def build_chatbot(): # Fine-tuning language model for chatbot using Langchain lang_model = '' # Additional fine-tuning steps can be added here return lang_model # Main function to run the Streamlit app def main(): st.title("Research Paper Understanding Chatbot") st.write("As of now supports only summarization.") # Upload PDF file uploaded_file = st.file_uploader("Upload a research paper (PDF)", type="pdf") if uploaded_file is not None: st.write("Paper uploaded successfully!") # Extract text from PDF text = extract_text_from_pdf(uploaded_file) # Display summary of the paper st.subheader("Summary of the Paper") with st.spinner("Brewing a potion for your paper's essence..."): summary = summarize_text(text) st.write(summary) # # Extract key information from the paper # st.subheader("Key Information") # paper_info = extract_paper_info(text) # st.write(paper_info) # # Build chatbot # st.subheader("Chatbot") # chatbot = build_chatbot() # # Chat interface # user_input = st.text_input("You: ") # if user_input: # response = chatbot.generate_response(user_input) # st.write("Chatbot:", response) else: st.write("Please upload a PDF file") if __name__ == "__main__": main()