File size: 2,321 Bytes
daa9788
d03bad1
 
 
a61752d
 
daa9788
d03bad1
 
 
daa9788
a61752d
 
 
1c0acc5
 
a61752d
1c0acc5
 
a61752d
1c0acc5
a61752d
 
 
 
 
 
 
 
 
 
 
d03bad1
 
 
daa9788
d03bad1
 
 
 
 
 
 
 
 
2b883e3
d03bad1
2b883e3
d03bad1
2b883e3
d03bad1
 
 
2b883e3
d03bad1
 
2b883e3
d03bad1
2b883e3
d03bad1
 
 
 
 
 
 
 
2b883e3
d03bad1
 
 
 
 
2b883e3
a61752d
 
2b883e3
a61752d
2b883e3
a61752d
 
 
 
2b883e3
a61752d
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import streamlit as st
import openai
import nltk
nltk.download('punkt')
import PyPDF2
import string

# st.write("ok")
# Set up OpenAI API credentials
openai.api_key = "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"



def read_pdf(fname):
    
    reader = PyPDF2.PdfReader(fname)
    text_ext = []
    for i in range(len(reader.pages)):
        pageObj = reader.pages[i]
        # extracting text from page
        text_ext.append(pageObj.extract_text())

    return text_ext

def clean_text(text):
    if isinstance(text, list):
        text = " ".join(text)
    text = text.lower()
    text = text.translate(str.maketrans("", "", string.punctuation))
    text = " ".join(text.split())
    return text

def generate_keywords(text):
    num_keywords = 6
    cleaned_text = text.strip()
  
    response = openai.Completion.create(
        engine="text-davinci-002",
        prompt=f"What are {num_keywords} highly related keywords for the following text?\n{cleaned_text}\n\nKeywords:",
        max_tokens=50,
        n=1,
        stop=None,
        temperature=0.5,
        best_of=num_keywords,
    )

    generated_text = response.choices[0].text.strip()

    keywords = generated_text.split(',')

    st.write("Top Keywords:")
    for i, keyword in enumerate(keywords[:num_keywords]):
        st.write(f"{i+1}. {keyword.strip()}")

def generate_summary(text):
    summary_length = 2

    cleaned_text = text.strip()

    response = openai.Completion.create(
        engine="text-davinci-002",
        prompt=f"Please summarize the following text in {summary_length} sentences:\n{cleaned_text}\n\nSummary:",
        max_tokens=100,
        n=1,
        stop=None,
        temperature=0.5,
    )

    generated_text = response.choices[0].text.strip()
    st.write("Description:")
    sentences = nltk.sent_tokenize(generated_text)
    for sentence in sentences:
      st.write(sentence)

# Main Streamlit app
st.title("Meta Magic")

uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

if uploaded_file is not None:
    # Read PDF file and extract text
    pages = read_pdf(uploaded_file)
    text = clean_text(pages)

    # Generate keywords and summary
    if st.button("Generate Keywords"):
        generate_keywords(text)
    
    if st.button("Generate Summary"):
        generate_summary(text)