askexpert_v / app.py
cogcorp's picture
Update app.py
c839cd9
import gradio as gr
from PyPDF2 import PdfReader
import zipfile
import os
import io
import nltk
import openai
import time
import subprocess
import sys
from sentence_transformers import SentenceTransformer, util
# install required libraries
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])
def install(package):
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
# Install PyTorch, transformers, and sentence-transformers
install("torch")
install("transformers")
install("sentence-transformers")
# download required NLTK data packages
nltk.download('punkt')
# Put your OpenAI API key here
openai.api_key = os.getenv('OpenAPI')
# Create a cache dictionary
cache = {}
# Load the pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')
def create_persona(text):
max_retries = 5
for attempt in range(max_retries):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are an expert at summarizing content to provide a factual persona."},
{"role": "user", "content": f"Create a persona based on this text: {text}"},
]
)
return response['choices'][0]['message']['content']
except Exception as e:
if attempt < max_retries - 1: # if it's not the last attempt
time.sleep(1) # wait for 1 second before retrying
continue
else:
return str(e) # return the exception message after the last attempt
def call_openai_api(persona, user_prompt, additional_facts):
max_retries = 5
for attempt in range(max_retries):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": f"You are an expert at providing short factual answers."},
{"role": "user", "content": f"""Ignore all previous instructions. As {persona}
You are James Waddell, you are the SME for The case for Microsoft Viva as an employee experience platform.
Here are some additional facts: {additional_facts}. Now, provide short factual answers as James, focus on the additional facts if provided: {user_prompt}"""},
]
)
return response['choices'][0]['message']['content']
except Exception as e:
if attempt < max_retries - 1: # if it's not the last attempt
time.sleep(1) # wait for 1 second before retrying
continue
else:
return str(e) # return the exception message after the last attempt
def extract_persona_from_pdf(pdf_file):
with open(pdf_file, 'rb') as f:
pdf = PdfReader(f)
aggregated_text = ''
for page in pdf.pages:
aggregated_text += page.extract_text()
return create_persona(aggregated_text)
# Extract the persona from the persona.pdf file
persona = extract_persona_from_pdf('persona.pdf')
def pdf_to_text(pdf_file_io, user_prompt, persona):
aggregated_text = ''
pdf = PdfReader(pdf_file_io)
for page in pdf.pages:
aggregated_text += page.extract_text()
cache[pdf_file_io] = aggregated_text
query_embedding = model.encode(user_prompt, convert_to_tensor=True)
text_embedding = model.encode(aggregated_text, convert_to_tensor=True)
cosine_scores = util.pytorch_cos_sim(query_embedding, text_embedding)
if cosine_scores[0][0] > 0.5:
additional_facts = "Direct answer from author's knoweledge: " + user_prompt
else:
additional_facts = "No additional information to add."
answer = call_openai_api(persona, user_prompt, additional_facts)
return answer
def ask_expert(user_prompt):
with zipfile.ZipFile("documents.zip", 'r') as z:
for filename in z.namelist():
if filename.endswith('.pdf'):
pdf_file_data = z.read(filename)
pdf_file_io = io.BytesIO(pdf_file_data)
result = pdf_to_text(pdf_file_io, user_prompt, persona)
return result
iface = gr.Interface(
fn=ask_expert,
inputs=gr.inputs.Textbox(lines=1, placeholder="Enter a question or prompt for the Author", label="User Prompt"),
outputs=gr.outputs.Textbox(label="Cognitive Agent Response")
)
iface.launch()