File size: 4,448 Bytes
a28cf98
99bb200
 
 
 
 
 
 
 
1aa91d1
850f7e7
99bb200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1aa91d1
99bb200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f369b8
99bb200
 
 
 
 
1aa91d1
99bb200
 
 
 
71ea9d5
99bb200
 
 
 
 
 
3f369b8
128c1ed
3f369b8
fbfc036
99bb200
 
 
 
 
1aa91d1
99bb200
 
 
 
850f7e7
 
 
 
 
 
 
99bb200
850f7e7
 
 
 
 
 
 
 
 
1aa91d1
99bb200
 
 
 
 
a408357
99bb200
a408357
71ea9d5
 
 
99bb200
42cf4fa
850f7e7
 
 
 
 
 
42cf4fa
71ea9d5
0fbc358
19b3b5c
 
 
0fbc358
19b3b5c
0fbc358
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import gradio as gr
from PyPDF2 import PdfReader
import zipfile
import os
import io
import nltk
import openai
import time
import subprocess
import sys
from sentence_transformers import SentenceTransformer, util

# install required libraries
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

# Install PyTorch, transformers, and sentence-transformers
install("torch")
install("transformers")
install("sentence-transformers")

# download required NLTK data packages
nltk.download('punkt')

# Put your OpenAI API key here
openai.api_key = os.getenv('OpenAPI')

# Create a cache dictionary
cache = {}

# Load the pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')

def create_persona(text):
    max_retries = 5
    for attempt in range(max_retries):
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are an expert at summarizing content to provide a factual persona."},
                    {"role": "user", "content": f"Create a persona based on this text: {text}"},
                ]
            )
            return response['choices'][0]['message']['content']
        except Exception as e:
            if attempt < max_retries - 1:  # if it's not the last attempt
                time.sleep(1)  # wait for 1 second before retrying
                continue
            else:
                return str(e)  # return the exception message after the last attempt

def call_openai_api(persona, user_prompt, additional_facts):
    max_retries = 5
    for attempt in range(max_retries):
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": f"You are an expert at providing short factual answers."},
                    {"role": "user", "content": f"""Ignore all previous instructions. As {persona}
 You are James Waddell, you wrote this paper: optimizing the Workplace through Technology. 
Here are some additional facts: {additional_facts}. Now, provide short factual answers as James, focus on the additional facts if provided: {user_prompt}"""}, 
                ]
            )
            return response['choices'][0]['message']['content']
        except Exception as e:
            if attempt < max_retries - 1:  # if it's not the last attempt
                time.sleep(1)  # wait for 1 second before retrying
                continue
            else:
                return str(e)  # return the exception message after the last attempt

def extract_persona_from_pdf(pdf_file):
    with open(pdf_file, 'rb') as f:
        pdf = PdfReader(f)
        aggregated_text = ''
        for page in pdf.pages:
            aggregated_text += page.extract_text()
    return create_persona(aggregated_text)

# Extract the persona from the persona.pdf file
persona = extract_persona_from_pdf('persona.pdf')

def pdf_to_text(pdf_file_io, user_prompt, persona):
    aggregated_text = ''
    pdf = PdfReader(pdf_file_io)
    for page in pdf.pages:
        aggregated_text += page.extract_text()
    cache[pdf_file_io] = aggregated_text

    query_embedding = model.encode(user_prompt, convert_to_tensor=True)
    text_embedding = model.encode(aggregated_text, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(query_embedding, text_embedding)

    if cosine_scores[0][0] > 0.5:
        additional_facts = "Direct answer from author's knoweledge: " + user_prompt
    else:
        additional_facts = "No additional information to add."

    answer = call_openai_api(persona, user_prompt, additional_facts)
    return answer

def ask_expert(user_prompt):
    with zipfile.ZipFile("documents.zip", 'r') as z:
        for filename in z.namelist():
            if filename.endswith('.pdf'):
                pdf_file_data = z.read(filename)
                pdf_file_io = io.BytesIO(pdf_file_data)
                result = pdf_to_text(pdf_file_io, user_prompt, persona)
    return result

iface = gr.Interface(
    fn=ask_expert, 
    inputs=gr.inputs.Textbox(lines=1, placeholder="Enter a question or prompt for the Author", label="User Prompt"), 
    outputs=gr.outputs.Textbox(label="Cognitive Agent Response")
)
iface.launch()