File size: 4,448 Bytes
a28cf98 99bb200 1aa91d1 850f7e7 99bb200 1aa91d1 99bb200 3f369b8 99bb200 1aa91d1 99bb200 71ea9d5 99bb200 3f369b8 128c1ed 3f369b8 fbfc036 99bb200 1aa91d1 99bb200 850f7e7 99bb200 850f7e7 1aa91d1 99bb200 a408357 99bb200 a408357 71ea9d5 99bb200 42cf4fa 850f7e7 42cf4fa 71ea9d5 0fbc358 19b3b5c 0fbc358 19b3b5c 0fbc358 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | import gradio as gr
from PyPDF2 import PdfReader
import zipfile
import os
import io
import nltk
import openai
import time
import subprocess
import sys
from sentence_transformers import SentenceTransformer, util
# install required libraries
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
def install(package):
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
# Install PyTorch, transformers, and sentence-transformers
install("torch")
install("transformers")
install("sentence-transformers")
# download required NLTK data packages
nltk.download('punkt')
# Put your OpenAI API key here
openai.api_key = os.getenv('OpenAPI')
# Create a cache dictionary
cache = {}
# Load the pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')
def create_persona(text):
max_retries = 5
for attempt in range(max_retries):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are an expert at summarizing content to provide a factual persona."},
{"role": "user", "content": f"Create a persona based on this text: {text}"},
]
)
return response['choices'][0]['message']['content']
except Exception as e:
if attempt < max_retries - 1: # if it's not the last attempt
time.sleep(1) # wait for 1 second before retrying
continue
else:
return str(e) # return the exception message after the last attempt
def call_openai_api(persona, user_prompt, additional_facts):
max_retries = 5
for attempt in range(max_retries):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": f"You are an expert at providing short factual answers."},
{"role": "user", "content": f"""Ignore all previous instructions. As {persona}
You are James Waddell, you wrote this paper: optimizing the Workplace through Technology.
Here are some additional facts: {additional_facts}. Now, provide short factual answers as James, focus on the additional facts if provided: {user_prompt}"""},
]
)
return response['choices'][0]['message']['content']
except Exception as e:
if attempt < max_retries - 1: # if it's not the last attempt
time.sleep(1) # wait for 1 second before retrying
continue
else:
return str(e) # return the exception message after the last attempt
def extract_persona_from_pdf(pdf_file):
with open(pdf_file, 'rb') as f:
pdf = PdfReader(f)
aggregated_text = ''
for page in pdf.pages:
aggregated_text += page.extract_text()
return create_persona(aggregated_text)
# Extract the persona from the persona.pdf file
persona = extract_persona_from_pdf('persona.pdf')
def pdf_to_text(pdf_file_io, user_prompt, persona):
aggregated_text = ''
pdf = PdfReader(pdf_file_io)
for page in pdf.pages:
aggregated_text += page.extract_text()
cache[pdf_file_io] = aggregated_text
query_embedding = model.encode(user_prompt, convert_to_tensor=True)
text_embedding = model.encode(aggregated_text, convert_to_tensor=True)
cosine_scores = util.pytorch_cos_sim(query_embedding, text_embedding)
if cosine_scores[0][0] > 0.5:
additional_facts = "Direct answer from author's knoweledge: " + user_prompt
else:
additional_facts = "No additional information to add."
answer = call_openai_api(persona, user_prompt, additional_facts)
return answer
def ask_expert(user_prompt):
with zipfile.ZipFile("documents.zip", 'r') as z:
for filename in z.namelist():
if filename.endswith('.pdf'):
pdf_file_data = z.read(filename)
pdf_file_io = io.BytesIO(pdf_file_data)
result = pdf_to_text(pdf_file_io, user_prompt, persona)
return result
iface = gr.Interface(
fn=ask_expert,
inputs=gr.inputs.Textbox(lines=1, placeholder="Enter a question or prompt for the Author", label="User Prompt"),
outputs=gr.outputs.Textbox(label="Cognitive Agent Response")
)
iface.launch()
|