File size: 2,461 Bytes
e3fd674
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import streamlit as st
import pdfplumber
import cohere
import os
from dotenv import load_dotenv

load_dotenv()

# Initialize Cohere API client
co = cohere.Client(os.environ["COHERE_API_KEY"])

# Text Extraction from PDF
def extract_text_from_pdf(pdf_file):
    with pdfplumber.open(pdf_file) as pdf:
        text = ""
        for page in pdf.pages:
            text += page.extract_text()
        return text

# Summarize Text Using Cohere
def summarize_text(text, section="main points"):
    prompt = f"Summarize the following {section} of a research paper in 3-4 sentences: {text}"
    response = co.generate(
        model="command-xlarge-nightly",  # Use the appropriate model
        prompt=prompt,
        max_tokens=150
    )
    return response.generations[0].text.strip()

# Question-Answering Using Cohere
def answer_question(text, question):
    prompt = f"Using the following text, answer the question accurately:\n\nText: {text}\n\nQuestion: {question}"
    response = co.generate(
        model="command-xlarge-nightly",
        prompt=prompt,
        max_tokens=150
    )
    return response.generations[0].text.strip()

# Streamlit App
st.title("ResearchDigest: Multi-Paper Summarizer & QA System")
st.subheader("Upload research papers to summarize them and ask questions.")

uploaded_files = st.file_uploader("Upload Research Papers (PDF)", type=["pdf"], accept_multiple_files=True)

if uploaded_files:
    all_texts = {}
    
    for uploaded_file in uploaded_files:
        with st.spinner(f"Processing {uploaded_file.name}..."):
            # Extract text from each uploaded file
            pdf_text = extract_text_from_pdf(uploaded_file)
            all_texts[uploaded_file.name] = pdf_text
    
    # Display Summaries for Each Paper
    st.subheader("Summaries")
    for file_name, text in all_texts.items():
        with st.spinner(f"Summarizing {file_name}..."):
            summary = summarize_text(text[:1000])  # Summarize first 1000 characters
        st.write(f"**{file_name}**")
        st.write(summary)
    
    # Add QA System
    st.subheader("Ask Questions")
    selected_file = st.selectbox("Select a paper to ask questions about:", list(all_texts.keys()))
    question = st.text_input("Enter your question:")
    
    if st.button("Get Answer") and question:
        with st.spinner("Generating answer..."):
            answer = answer_question(all_texts[selected_file], question)
        st.write(f"**Answer:** {answer}")