|
|
import os |
|
|
import time |
|
|
import matplotlib.pyplot as plt |
|
|
from wordcloud import WordCloud |
|
|
from dotenv import load_dotenv |
|
|
from PyPDF2 import PdfReader |
|
|
import streamlit as st |
|
|
from pptx import Presentation |
|
|
from pptx.util import Inches |
|
|
from langchain.text_splitter import CharacterTextSplitter |
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
from langchain_community.vectorstores import FAISS |
|
|
from langchain.chains import RetrievalQA, ConversationalRetrievalChain |
|
|
from langchain_groq import ChatGroq |
|
|
from langchain.memory import ConversationBufferMemory |
|
|
|
|
|
from gtts import gTTS |
|
|
|
|
|
def load_groq_api_key(): |
|
|
groq_api_key = os.getenv("GROQ_API_KEY") |
|
|
if not groq_api_key: |
|
|
raise ValueError("Error: GROQ_API_KEY not found in environment variables.") |
|
|
return groq_api_key |
|
|
|
|
|
|
|
|
|
|
|
def process_text(text): |
|
|
text_splitter = CharacterTextSplitter( |
|
|
separator="\n", |
|
|
chunk_size=3000, |
|
|
chunk_overlap=500, |
|
|
length_function=len |
|
|
) |
|
|
chunks = text_splitter.split_text(text) |
|
|
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
knowledgeBase = FAISS.from_texts(chunks, embeddings) |
|
|
|
|
|
return knowledgeBase |
|
|
|
|
|
|
|
|
|
|
|
def generate_summary(knowledgeBase): |
|
|
query = ( |
|
|
"Summarize the research paper in a structured format, covering objective, proposed model, methods, evaluation, comparison, and key results. Keep it concise and clear, using bullet points." |
|
|
) |
|
|
|
|
|
retriever = knowledgeBase.as_retriever() |
|
|
llm = ChatGroq(model_name="llama3-8b-8192", groq_api_key=os.getenv("GROQ_API_KEY"), temperature=0.1) |
|
|
chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) |
|
|
|
|
|
response = chain.invoke({"query": query}) |
|
|
return response['result'] |
|
|
|
|
|
|
|
|
|
|
|
def generate_importance_analysis(knowledgeBase): |
|
|
query = ( |
|
|
"Analyze why this research paper is important for the world and what readers should learn from it. " |
|
|
"Focus on:\n" |
|
|
"1. The global significance of this research\n" |
|
|
"2. Potential real-world applications\n" |
|
|
"3. Key takeaways for readers\n" |
|
|
"4. How it advances the field\n" |
|
|
"Present in clear, concise bullet points with emojis for better readability." |
|
|
) |
|
|
|
|
|
retriever = knowledgeBase.as_retriever() |
|
|
llm = ChatGroq(model_name="llama3-70b-8192", groq_api_key=os.getenv("GROQ_API_KEY"), temperature=0.2) |
|
|
chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) |
|
|
|
|
|
response = chain.invoke({"query": query}) |
|
|
return response['result'] |
|
|
|
|
|
|
|
|
|
|
|
def init_document_chatbot(knowledgeBase): |
|
|
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) |
|
|
|
|
|
llm = ChatGroq( |
|
|
model_name="llama3-8b-8192", |
|
|
groq_api_key=os.getenv("GROQ_API_KEY"), |
|
|
temperature=0.2 |
|
|
) |
|
|
|
|
|
return ConversationalRetrievalChain.from_llm( |
|
|
llm=llm, |
|
|
retriever=knowledgeBase.as_retriever(), |
|
|
memory=memory, |
|
|
chain_type="stuff" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def text_to_speech(text): |
|
|
try: |
|
|
|
|
|
tts = gTTS(text=text, lang='en') |
|
|
audio_path = "/tmp/summary_audio.mp3" |
|
|
tts.save(audio_path) |
|
|
|
|
|
|
|
|
if os.path.exists(audio_path): |
|
|
return audio_path |
|
|
else: |
|
|
raise Exception("Audio file not created") |
|
|
except Exception as e: |
|
|
print(f"Error in gTTS: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
def generate_wordcloud(text): |
|
|
wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text) |
|
|
plt.figure(figsize=(10, 5)) |
|
|
plt.imshow(wordcloud, interpolation="bilinear") |
|
|
plt.axis("off") |
|
|
plt.savefig("wordcloud.png", bbox_inches="tight") |
|
|
st.image("wordcloud.png", caption="π WordCloud of Important Keywords", use_container_width=True) |
|
|
|
|
|
|
|
|
|
|
|
def generate_ppt(summary): |
|
|
prs = Presentation() |
|
|
slide_layout = prs.slide_layouts[1] |
|
|
|
|
|
|
|
|
title_slide_layout = prs.slide_layouts[0] |
|
|
slide = prs.slides.add_slide(title_slide_layout) |
|
|
title = slide.shapes.title |
|
|
title.text = "Research Paper Summary" |
|
|
|
|
|
|
|
|
sections = summary.split("\n\n") |
|
|
for section in sections: |
|
|
slide = prs.slides.add_slide(slide_layout) |
|
|
title = slide.shapes.title |
|
|
content = slide.shapes.placeholders[1] |
|
|
|
|
|
lines = section.split("\n") |
|
|
if lines: |
|
|
title.text = lines[0] |
|
|
content.text = "\n".join(lines[1:]) |
|
|
|
|
|
|
|
|
ppt_filename = "summary_presentation.pptx" |
|
|
prs.save(ppt_filename) |
|
|
return ppt_filename |
|
|
|
|
|
|
|
|
|
|
|
def display_pdf_info(text, pdf_reader): |
|
|
total_pages = len(pdf_reader.pages) |
|
|
word_count = len(text.split()) |
|
|
first_few_lines = " ".join(text.split()[:50]) + "..." |
|
|
|
|
|
st.subheader("π PDF Insights") |
|
|
st.write(f"π **Total Pages:** {total_pages}") |
|
|
st.write(f"π’ **Word Count:** {word_count}") |
|
|
st.write(f"π **First Few Lines:** {first_few_lines}") |
|
|
|
|
|
with st.expander("π **View More Insights**"): |
|
|
st.write("π‘ **Pro Tip:** LLaMA-3 can summarize large documents in seconds! π") |
|
|
st.info( |
|
|
"π Research papers are typically structured into sections like Abstract, Introduction, Methods, and Results. AI captures these key elements!") |
|
|
|
|
|
|
|
|
|
|
|
def document_chatbot_interface(conversation_chain): |
|
|
st.subheader("π¬ Document Chatbot") |
|
|
st.warning( |
|
|
"This chatbot only answers questions about the uploaded document. It won't respond to general questions.") |
|
|
|
|
|
|
|
|
if "messages" not in st.session_state: |
|
|
st.session_state.messages = [] |
|
|
st.session_state.messages.append({ |
|
|
"role": "assistant", |
|
|
"content": "Ask me anything about the research paper you uploaded! For example:\n\n" |
|
|
"β’ What is the main objective of this research?\n" |
|
|
"β’ Can you explain the methodology used?\n" |
|
|
"β’ What were the key findings?\n" |
|
|
"β’ How does this compare to previous work?" |
|
|
}) |
|
|
|
|
|
|
|
|
for message in st.session_state.messages: |
|
|
with st.chat_message(message["role"]): |
|
|
st.markdown(message["content"]) |
|
|
|
|
|
|
|
|
if prompt := st.chat_input("Ask about the research paper..."): |
|
|
|
|
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
|
|
|
|
with st.chat_message("user"): |
|
|
st.markdown(prompt) |
|
|
|
|
|
|
|
|
with st.chat_message("assistant"): |
|
|
with st.spinner("Thinking..."): |
|
|
try: |
|
|
response = conversation_chain({"question": prompt}) |
|
|
answer = response["answer"] |
|
|
|
|
|
|
|
|
if "I don't know" in answer or "not mentioned" in answer.lower(): |
|
|
answer = "This information is not covered in the document. Please ask questions specifically about the research paper content." |
|
|
|
|
|
st.markdown(answer) |
|
|
st.session_state.messages.append({"role": "assistant", "content": answer}) |
|
|
except Exception as e: |
|
|
st.error("Sorry, I encountered an error processing your question. Please try again.") |
|
|
st.session_state.messages.append({"role": "assistant", "content": "Error processing request"}) |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
st.title("π Advanced Research Paper Analyzer") |
|
|
st.write("π Powered by LLaMA-3 on Groq - Understand why research matters and what you should learn") |
|
|
st.divider() |
|
|
|
|
|
try: |
|
|
os.environ["GROQ_API_KEY"] = load_groq_api_key() |
|
|
except ValueError as e: |
|
|
st.error(str(e)) |
|
|
return |
|
|
|
|
|
pdf = st.file_uploader("π€ Upload your Research Paper (PDF)", type="pdf") |
|
|
|
|
|
if pdf is not None: |
|
|
with st.spinner("π Extracting text & analyzing PDF... Please wait!"): |
|
|
pdf_reader = PdfReader(pdf) |
|
|
text = "".join(page.extract_text() for page in pdf_reader.pages if page.extract_text()) |
|
|
knowledgeBase = process_text(text) |
|
|
|
|
|
display_pdf_info(text, pdf_reader) |
|
|
|
|
|
st.success("β
PDF processed successfully! Now generating insights...") |
|
|
|
|
|
|
|
|
tab1, tab2, tab3 = st.tabs(["π Summary", "π Why This Matters", "π¬ Chat with Paper"]) |
|
|
|
|
|
with tab1: |
|
|
with st.spinner("π§ Generating comprehensive summary..."): |
|
|
response = generate_summary(knowledgeBase) |
|
|
st.subheader("π Structured Summary:") |
|
|
st.markdown(response, unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
audio_file = text_to_speech(response) |
|
|
st.audio(audio_file, format="audio/mp3") |
|
|
|
|
|
|
|
|
generate_wordcloud(response) |
|
|
|
|
|
|
|
|
ppt_file = generate_ppt(response) |
|
|
with open(ppt_file, "rb") as file: |
|
|
st.download_button(label="π₯ Download Summary PPT", data=file, file_name="Research_Summary.pptx") |
|
|
|
|
|
with tab2: |
|
|
with st.spinner("π Analyzing global significance and key learnings..."): |
|
|
importance = generate_importance_analysis(knowledgeBase) |
|
|
st.subheader("π Why This Research Matters") |
|
|
st.markdown(""" |
|
|
<style> |
|
|
.big-font { |
|
|
font-size:18px !important; |
|
|
color: #2e86de; |
|
|
} |
|
|
.highlight { |
|
|
background-color: #f5f6fa; |
|
|
padding: 10px; |
|
|
border-radius: 5px; |
|
|
border-left: 4px solid #4b7bec; |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(""" |
|
|
<div class="highlight"> |
|
|
<p class="big-font">This analysis explains why the paper you uploaded is important and what you should learn from it.</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(importance, unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(""" |
|
|
<div style="margin-top: 20px; padding: 10px; background-color: #f8f9fa; border-radius: 5px;"> |
|
|
<h4>π‘ How to Apply This Knowledge</h4> |
|
|
<ul> |
|
|
<li>Consider how these findings might impact your work or studies</li> |
|
|
<li>Think about potential applications in your field</li> |
|
|
<li>Identify areas for further research or implementation</li> |
|
|
</ul> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
with tab3: |
|
|
conversation_chain = init_document_chatbot(knowledgeBase) |
|
|
document_chatbot_interface(conversation_chain) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |