testv3 / app.py
garfieldmaitland's picture
Update app.py
0a0eb8f verified
import gradio as gr
from huggingface_hub import InferenceClient
# -*- coding: utf-8 -*-
"""princeton_p3v2.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1c2inNv6X080nW27nK4BAIroRQfPAEOOW
"""
# Cell 1: Install tesseract-ocr
!apt-get install tesseract-ocr
# Install Python packages from requirements.txt
!pip install -r requirements.txt
# Cell 2: Import required libraries
import os
import re
import sys
import openai
from textblob import TextBlob
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.vectorstores import Chroma
import gradio as gr
# Cell 3: Set up API key and constants
# TODO: Replace 'your_api_key_here' with your actual OpenAI API key
os.environ["OPENAI_API_KEY"] = "sk-5F8aLGRNWlsDq423W5WEPlBq0KifI0LlP3e3zG5jMjT3BlbkFJ7FF6d6PswfzelTGEnnvErTEMHwv_HnoJBRxacOvaYA"
# Cell 4: Define constants and settings
PERSIST = False
DATA_DIRECTORY = "/content/drive/MyDrive/Colab_Notebooks/princeton_p3/" # TODO: Update this to your actual data directory
# Cell 5: Create or load the index
if PERSIST and os.path.exists("persist"):
print("Reusing index...\n")
vectorstore = Chroma(persist_directory="persist", embedding_function=OpenAIEmbeddings())
index = VectorStoreIndexWrapper(vectorstore=vectorstore)
else:
loader = DirectoryLoader(DATA_DIRECTORY, glob="princeton_p3_dataset.csv")
if PERSIST:
index = VectorstoreIndexCreator(vectorstore_kwargs={"persist_directory":"persist"}).from_loaders([loader])
else:
index = VectorstoreIndexCreator(embedding=OpenAIEmbeddings()).from_loaders([loader])
# Cell 6: Create the conversational chain
chain = ConversationalRetrievalChain.from_llm(
# llm=ChatOpenAI(model="gpt-3.5-turbo"),
llm=ChatOpenAI(model="gpt-3.5-turbo-16k"),
# llm=ChatOpenAI(model="gpt-4o-mini"),
# llm=ChatOpenAI(model="gpt-4"), # Works 7
# llm=ChatOpenAI(model="gpt-4-turbo"), #
# retriever=index.vectorstore.as_retriever(search_kwargs={"k": 2}),
retriever=index.vectorstore.as_retriever(search_kwargs={"k": 5}),
)
# Cell 7: Define the personalized answer function
def get_personalized_answer(query):
# Add a personal touch to the query
personalized_query = f"As a helpful agent about princetonadi, {query}"
result = chain({"question": personalized_query, "chat_history": []})
# Post-process the answer to make it more personal
answer = result['answer']
return f"{answer}"
# Cell 8: Create and launch Gradio interface
iface = gr.Interface(
fn=get_personalized_answer,
inputs=gr.Textbox(lines=2, placeholder="Ask me anything about optimizing your life..."),
outputs="text",
title="Princeton ADI | GenAI Model",
description="Your personal AI assistant to help optimize your life and achieve your goals. (powered by GPT-3.5 Turbo)",
examples=[
["What is Princeton ADI?"],
["When does the P3 program take place?"],
["What are the core values of Princeton?"],
["Which students were in the student 2023 spotlight?"],
["How can current and prospective students engage with Princeton ADI?"],
],
css="footer {display:none !important}",
theme=gr.themes.Default()
)
iface.launch(share=True, show_api=False)