File size: 11,678 Bytes
0e23dfd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4c46a4
 
0e23dfd
6707114
0e23dfd
6707114
0e23dfd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4c46a4
 
0e23dfd
e4c46a4
 
 
 
 
 
 
 
 
 
 
 
 
 
0e23dfd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
import os
import time
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from dotenv import load_dotenv
from PyPDF2 import PdfReader
import streamlit as st
from pptx import Presentation
from pptx.util import Inches
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
from langchain_groq import ChatGroq
from langchain.memory import ConversationBufferMemory

from gtts import gTTS

def load_groq_api_key():
    groq_api_key = os.getenv("GROQ_API_KEY") 
    if not groq_api_key:
        raise ValueError("Error: GROQ_API_KEY not found in environment variables.")
    return groq_api_key


# πŸ”Ή Process Text (Split & Embed)
def process_text(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=3000,
        chunk_overlap=500,
        length_function=len
    )
    chunks = text_splitter.split_text(text)

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    knowledgeBase = FAISS.from_texts(chunks, embeddings)

    return knowledgeBase


# πŸ”Ή Generate Structured Summary
def generate_summary(knowledgeBase):
    query = (
        "Summarize the research paper in a structured format, covering objective, proposed model, methods, evaluation, comparison, and key results. Keep it concise and clear, using bullet points."
    )

    retriever = knowledgeBase.as_retriever()
    llm = ChatGroq(model_name="llama3-8b-8192", groq_api_key=os.getenv("GROQ_API_KEY"), temperature=0.1)
    chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

    response = chain.invoke({"query": query})
    return response['result']


# πŸ”Ή Generate Importance Analysis
def generate_importance_analysis(knowledgeBase):
    query = (
        "Analyze why this research paper is important for the world and what readers should learn from it. "
        "Focus on:\n"
        "1. The global significance of this research\n"
        "2. Potential real-world applications\n"
        "3. Key takeaways for readers\n"
        "4. How it advances the field\n"
        "Present in clear, concise bullet points with emojis for better readability."
    )

    retriever = knowledgeBase.as_retriever()
    llm = ChatGroq(model_name="llama3-70b-8192", groq_api_key=os.getenv("GROQ_API_KEY"), temperature=0.2)
    chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

    response = chain.invoke({"query": query})
    return response['result']


# πŸ”Ή Initialize Document Chatbot
def init_document_chatbot(knowledgeBase):
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    llm = ChatGroq(
        model_name="llama3-8b-8192",
        groq_api_key=os.getenv("GROQ_API_KEY"),
        temperature=0.2
    )

    return ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=knowledgeBase.as_retriever(),
        memory=memory,
        chain_type="stuff"
    )




def text_to_speech(text):
    try:
        # Create audio file with gTTS
        tts = gTTS(text=text, lang='en')
        audio_path = "/tmp/summary_audio.mp3"  # Use /tmp/ for Hugging Face Spaces
        tts.save(audio_path)
        
        # Verify file was created
        if os.path.exists(audio_path):
            return audio_path
        else:
            raise Exception("Audio file not created")
    except Exception as e:
        print(f"Error in gTTS: {e}")
        return None    


# πŸ”Ή Generate WordCloud
def generate_wordcloud(text):
    wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.savefig("wordcloud.png", bbox_inches="tight")
    st.image("wordcloud.png", caption="πŸ”  WordCloud of Important Keywords", use_container_width=True)


# πŸ”Ή Convert Summary to PowerPoint
def generate_ppt(summary):
    prs = Presentation()
    slide_layout = prs.slide_layouts[1]  # Title and Content Layout

    # Add Title Slide
    title_slide_layout = prs.slide_layouts[0]
    slide = prs.slides.add_slide(title_slide_layout)
    title = slide.shapes.title
    title.text = "Research Paper Summary"

    # Add Content Slides
    sections = summary.split("\n\n")  # Break summary into sections
    for section in sections:
        slide = prs.slides.add_slide(slide_layout)
        title = slide.shapes.title
        content = slide.shapes.placeholders[1]

        lines = section.split("\n")
        if lines:
            title.text = lines[0]  # First line as title
            content.text = "\n".join(lines[1:])  # Remaining as bullet points

    # Save PowerPoint File
    ppt_filename = "summary_presentation.pptx"
    prs.save(ppt_filename)
    return ppt_filename


# πŸ”Ή Display PDF Info for Engagement
def display_pdf_info(text, pdf_reader):
    total_pages = len(pdf_reader.pages)
    word_count = len(text.split())
    first_few_lines = " ".join(text.split()[:50]) + "..."

    st.subheader("πŸ“„ PDF Insights")
    st.write(f"πŸ“ **Total Pages:** {total_pages}")
    st.write(f"πŸ”’ **Word Count:** {word_count}")
    st.write(f"πŸ“Œ **First Few Lines:** {first_few_lines}")

    with st.expander("πŸ” **View More Insights**"):
        st.write("πŸ’‘ **Pro Tip:** LLaMA-3 can summarize large documents in seconds! πŸš€")
        st.info(
            "πŸ“– Research papers are typically structured into sections like Abstract, Introduction, Methods, and Results. AI captures these key elements!")


# πŸ”Ή Document Chatbot Interface
def document_chatbot_interface(conversation_chain):
    st.subheader("πŸ’¬ Document Chatbot")
    st.warning(
        "This chatbot only answers questions about the uploaded document. It won't respond to general questions.")

    # Initialize chat history
    if "messages" not in st.session_state:
        st.session_state.messages = []
        st.session_state.messages.append({
            "role": "assistant",
            "content": "Ask me anything about the research paper you uploaded! For example:\n\n"
                       "β€’ What is the main objective of this research?\n"
                       "β€’ Can you explain the methodology used?\n"
                       "β€’ What were the key findings?\n"
                       "β€’ How does this compare to previous work?"
        })

    # Display chat messages from history
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    # Accept user input
    if prompt := st.chat_input("Ask about the research paper..."):
        # Add user message to chat history
        st.session_state.messages.append({"role": "user", "content": prompt})
        # Display user message in chat message container
        with st.chat_message("user"):
            st.markdown(prompt)

        # Display assistant response in chat message container
        with st.chat_message("assistant"):
            with st.spinner("Thinking..."):
                try:
                    response = conversation_chain({"question": prompt})
                    answer = response["answer"]

                    # Check if answer is relevant to document
                    if "I don't know" in answer or "not mentioned" in answer.lower():
                        answer = "This information is not covered in the document. Please ask questions specifically about the research paper content."

                    st.markdown(answer)
                    st.session_state.messages.append({"role": "assistant", "content": answer})
                except Exception as e:
                    st.error("Sorry, I encountered an error processing your question. Please try again.")
                    st.session_state.messages.append({"role": "assistant", "content": "Error processing request"})


# πŸ”Ή Main Streamlit App
def main():
    st.title("πŸ“„ Advanced Research Paper Analyzer")
    st.write("πŸš€ Powered by LLaMA-3 on Groq - Understand why research matters and what you should learn")
    st.divider()

    try:
        os.environ["GROQ_API_KEY"] = load_groq_api_key()
    except ValueError as e:
        st.error(str(e))
        return

    pdf = st.file_uploader("πŸ“€ Upload your Research Paper (PDF)", type="pdf")

    if pdf is not None:
        with st.spinner("πŸ”„ Extracting text & analyzing PDF... Please wait!"):
            pdf_reader = PdfReader(pdf)
            text = "".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
            knowledgeBase = process_text(text)

            display_pdf_info(text, pdf_reader)

        st.success("βœ… PDF processed successfully! Now generating insights...")

        # Create tabs for different analysis sections
        tab1, tab2, tab3 = st.tabs(["πŸ“œ Summary", "🌍 Why This Matters", "πŸ’¬ Chat with Paper"])

        with tab1:
            with st.spinner("🧠 Generating comprehensive summary..."):
                response = generate_summary(knowledgeBase)
                st.subheader("πŸ“œ Structured Summary:")
                st.markdown(response, unsafe_allow_html=True)

                # Audio Conversion
                audio_file = text_to_speech(response)
                st.audio(audio_file, format="audio/mp3")

                # WordCloud Generation
                generate_wordcloud(response)

                # PowerPoint Conversion
                ppt_file = generate_ppt(response)
                with open(ppt_file, "rb") as file:
                    st.download_button(label="πŸ“₯ Download Summary PPT", data=file, file_name="Research_Summary.pptx")

        with tab2:
            with st.spinner("πŸ” Analyzing global significance and key learnings..."):
                importance = generate_importance_analysis(knowledgeBase)
                st.subheader("🌍 Why This Research Matters")
                st.markdown("""
                <style>
                    .big-font {
                        font-size:18px !important;
                        color: #2e86de;
                    }
                    .highlight {
                        background-color: #f5f6fa;
                        padding: 10px;
                        border-radius: 5px;
                        border-left: 4px solid #4b7bec;
                    }
                </style>
                """, unsafe_allow_html=True)

                st.markdown("""
                <div class="highlight">
                    <p class="big-font">This analysis explains why the paper you uploaded is important and what you should learn from it.</p>
                </div>
                """, unsafe_allow_html=True)

                st.markdown(importance, unsafe_allow_html=True)

                st.markdown("""
                <div style="margin-top: 20px; padding: 10px; background-color: #f8f9fa; border-radius: 5px;">
                    <h4>πŸ’‘ How to Apply This Knowledge</h4>
                    <ul>
                        <li>Consider how these findings might impact your work or studies</li>
                        <li>Think about potential applications in your field</li>
                        <li>Identify areas for further research or implementation</li>
                    </ul>
                </div>
                """, unsafe_allow_html=True)

        with tab3:
            conversation_chain = init_document_chatbot(knowledgeBase)
            document_chatbot_interface(conversation_chain)




if __name__ == "__main__":
    main()