import os import time import matplotlib.pyplot as plt from wordcloud import WordCloud from dotenv import load_dotenv from PyPDF2 import PdfReader import streamlit as st from pptx import Presentation from pptx.util import Inches from langchain.text_splitter import CharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain.chains import RetrievalQA, ConversationalRetrievalChain from langchain_groq import ChatGroq from langchain.memory import ConversationBufferMemory from gtts import gTTS def load_groq_api_key(): groq_api_key = os.getenv("GROQ_API_KEY") if not groq_api_key: raise ValueError("Error: GROQ_API_KEY not found in environment variables.") return groq_api_key # 🔹 Process Text (Split & Embed) def process_text(text): text_splitter = CharacterTextSplitter( separator="\n", chunk_size=3000, chunk_overlap=500, length_function=len ) chunks = text_splitter.split_text(text) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") knowledgeBase = FAISS.from_texts(chunks, embeddings) return knowledgeBase # 🔹 Generate Structured Summary def generate_summary(knowledgeBase): query = ( "Summarize the research paper in a structured format, covering objective, proposed model, methods, evaluation, comparison, and key results. Keep it concise and clear, using bullet points." ) retriever = knowledgeBase.as_retriever() llm = ChatGroq(model_name="llama3-8b-8192", groq_api_key=os.getenv("GROQ_API_KEY"), temperature=0.1) chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) response = chain.invoke({"query": query}) return response['result'] # 🔹 Generate Importance Analysis def generate_importance_analysis(knowledgeBase): query = ( "Analyze why this research paper is important for the world and what readers should learn from it. " "Focus on:\n" "1. The global significance of this research\n" "2. Potential real-world applications\n" "3. Key takeaways for readers\n" "4. How it advances the field\n" "Present in clear, concise bullet points with emojis for better readability." ) retriever = knowledgeBase.as_retriever() llm = ChatGroq(model_name="llama3-70b-8192", groq_api_key=os.getenv("GROQ_API_KEY"), temperature=0.2) chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) response = chain.invoke({"query": query}) return response['result'] # 🔹 Initialize Document Chatbot def init_document_chatbot(knowledgeBase): memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) llm = ChatGroq( model_name="llama3-8b-8192", groq_api_key=os.getenv("GROQ_API_KEY"), temperature=0.2 ) return ConversationalRetrievalChain.from_llm( llm=llm, retriever=knowledgeBase.as_retriever(), memory=memory, chain_type="stuff" ) def text_to_speech(text): try: # Create audio file with gTTS tts = gTTS(text=text, lang='en') audio_path = "/tmp/summary_audio.mp3" # Use /tmp/ for Hugging Face Spaces tts.save(audio_path) # Verify file was created if os.path.exists(audio_path): return audio_path else: raise Exception("Audio file not created") except Exception as e: print(f"Error in gTTS: {e}") return None # 🔹 Generate WordCloud def generate_wordcloud(text): wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text) plt.figure(figsize=(10, 5)) plt.imshow(wordcloud, interpolation="bilinear") plt.axis("off") plt.savefig("wordcloud.png", bbox_inches="tight") st.image("wordcloud.png", caption="🔠 WordCloud of Important Keywords", use_container_width=True) # 🔹 Convert Summary to PowerPoint def generate_ppt(summary): prs = Presentation() slide_layout = prs.slide_layouts[1] # Title and Content Layout # Add Title Slide title_slide_layout = prs.slide_layouts[0] slide = prs.slides.add_slide(title_slide_layout) title = slide.shapes.title title.text = "Research Paper Summary" # Add Content Slides sections = summary.split("\n\n") # Break summary into sections for section in sections: slide = prs.slides.add_slide(slide_layout) title = slide.shapes.title content = slide.shapes.placeholders[1] lines = section.split("\n") if lines: title.text = lines[0] # First line as title content.text = "\n".join(lines[1:]) # Remaining as bullet points # Save PowerPoint File ppt_filename = "summary_presentation.pptx" prs.save(ppt_filename) return ppt_filename # 🔹 Display PDF Info for Engagement def display_pdf_info(text, pdf_reader): total_pages = len(pdf_reader.pages) word_count = len(text.split()) first_few_lines = " ".join(text.split()[:50]) + "..." st.subheader("📄 PDF Insights") st.write(f"📝 **Total Pages:** {total_pages}") st.write(f"🔢 **Word Count:** {word_count}") st.write(f"📌 **First Few Lines:** {first_few_lines}") with st.expander("🔍 **View More Insights**"): st.write("💡 **Pro Tip:** LLaMA-3 can summarize large documents in seconds! 🚀") st.info( "📖 Research papers are typically structured into sections like Abstract, Introduction, Methods, and Results. AI captures these key elements!") # 🔹 Document Chatbot Interface def document_chatbot_interface(conversation_chain): st.subheader("💬 Document Chatbot") st.warning( "This chatbot only answers questions about the uploaded document. It won't respond to general questions.") # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] st.session_state.messages.append({ "role": "assistant", "content": "Ask me anything about the research paper you uploaded! For example:\n\n" "• What is the main objective of this research?\n" "• Can you explain the methodology used?\n" "• What were the key findings?\n" "• How does this compare to previous work?" }) # Display chat messages from history for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Accept user input if prompt := st.chat_input("Ask about the research paper..."): # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) # Display user message in chat message container with st.chat_message("user"): st.markdown(prompt) # Display assistant response in chat message container with st.chat_message("assistant"): with st.spinner("Thinking..."): try: response = conversation_chain({"question": prompt}) answer = response["answer"] # Check if answer is relevant to document if "I don't know" in answer or "not mentioned" in answer.lower(): answer = "This information is not covered in the document. Please ask questions specifically about the research paper content." st.markdown(answer) st.session_state.messages.append({"role": "assistant", "content": answer}) except Exception as e: st.error("Sorry, I encountered an error processing your question. Please try again.") st.session_state.messages.append({"role": "assistant", "content": "Error processing request"}) # 🔹 Main Streamlit App def main(): st.title("📄 Advanced Research Paper Analyzer") st.write("🚀 Powered by LLaMA-3 on Groq - Understand why research matters and what you should learn") st.divider() try: os.environ["GROQ_API_KEY"] = load_groq_api_key() except ValueError as e: st.error(str(e)) return pdf = st.file_uploader("📤 Upload your Research Paper (PDF)", type="pdf") if pdf is not None: with st.spinner("🔄 Extracting text & analyzing PDF... Please wait!"): pdf_reader = PdfReader(pdf) text = "".join(page.extract_text() for page in pdf_reader.pages if page.extract_text()) knowledgeBase = process_text(text) display_pdf_info(text, pdf_reader) st.success("✅ PDF processed successfully! Now generating insights...") # Create tabs for different analysis sections tab1, tab2, tab3 = st.tabs(["📜 Summary", "🌍 Why This Matters", "💬 Chat with Paper"]) with tab1: with st.spinner("🧠 Generating comprehensive summary..."): response = generate_summary(knowledgeBase) st.subheader("📜 Structured Summary:") st.markdown(response, unsafe_allow_html=True) # Audio Conversion audio_file = text_to_speech(response) st.audio(audio_file, format="audio/mp3") # WordCloud Generation generate_wordcloud(response) # PowerPoint Conversion ppt_file = generate_ppt(response) with open(ppt_file, "rb") as file: st.download_button(label="📥 Download Summary PPT", data=file, file_name="Research_Summary.pptx") with tab2: with st.spinner("🔍 Analyzing global significance and key learnings..."): importance = generate_importance_analysis(knowledgeBase) st.subheader("🌍 Why This Research Matters") st.markdown(""" """, unsafe_allow_html=True) st.markdown("""
This analysis explains why the paper you uploaded is important and what you should learn from it.