mybrain / app.py
diazcalvi's picture
Update app.py
e2d6961
import os
import gradio as gr
import gradio
from git import Repo
from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader, ServiceContext,LLMPredictor
from langchain.chat_models import ChatOpenAI
from llama_index.llm_predictor.chatgpt import ChatGPTLLMPredictor
import huggingface_hub
from huggingface_hub import Repository, login, HfApi
from datetime import datetime
import csv
access_token_read = "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"
access_token_write = "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"
login(token = access_token_read)
hf_token = "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"
#api = HfApi()
#api.login(token=hf_token)
DATASET_REPO_URL = "https://huggingface.co/datasets/diazcalvi/mybrain"
DATA_FILENAME = "kion.json"
#DATA_FILE = os.path.join("vdb", DATA_FILENAME)
HF_TOKEN = hf_token
print("is none?", HF_TOKEN is None)
print("hfh", huggingface_hub.__version__)
os.system("git config --global user.name \"diazcalvi\"")
os.system("git config --global user.email \"diazcalvi@gmail.com\"")
#repo = Repository(
# repository_name=DATASET_REPO_URL,
# token=hf_token,
#)
#
repo = Repository(
local_dir="vdb", clone_from=DATASET_REPO_URL, use_auth_token=hf_token
)
##
#local_repo_path = "vdb"
#repo.clone(local_repo_path)
#index_name = "./data/kion.json"
index_name = "./vdb/" + DATA_FILENAME
documents_folder = "./documents"
print("DATAFILE:" + index_name)
#@st.experimental_memo
#@st.cache_resource
def initialize_index(index_name, documents_folder):
#llm_predictor = ChatGPTLLMPredictor()
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")) # text-davinci-003"))"gpt-3.5-turbo"
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
if os.path.exists(index_name):
index = GPTSimpleVectorIndex.load_from_disk(index_name)
else:
documents = SimpleDirectoryReader(documents_folder).load_data()
index = GPTSimpleVectorIndex.from_documents(documents)
index.save_to_disk(index_name)
#print("Saving to DATAFILE:" + DATA_FILE)
#commit_url = repo.push_to_hub()
#print(commit_url)
#index.save_to_disk(DATA_FILE)
user = "diazcalvi"
#repo_name = "mybrain"
repo = Repository(
user,
DATASET_REPO_URL,
token=hf_token,
)
local_repo_path = "vdb"
myrepo = Repo(local_repo_path)
#myrepo.git.add(update=True)
myrepo.git.add(all=True)
myrepo.git.commit("-m", "commit message")
myrepo.git.push("--force")
return index
#@st.experimental_memo
#@st.cache_data(max_entries=200, persist=True)
def query_index(_index, query_text):
response = _index.query(query_text)
return str(response)
def generate_html() -> str:
with open(DATA_FILE) as csvfile:
reader = csv.DictReader(csvfile)
rows = []
for row in reader:
rows.append(row)
rows.reverse()
if len(rows) == 0:
return "no messages yet"
else:
html = "<div class='chatbot'>"
for row in rows:
html += "<div>"
html += f"<span>{row['name']}</span>"
html += f"<span class='message'>{row['message']}</span>"
html += "</div>"
html += "</div>"
return html
def store_message(name: str, message: str):
if name and message:
print(DATA_FILE)
print(DATA_FILENAME)
print(DATASET_REPO_URL)
with open(DATA_FILE, "a") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"])
writer.writerow(
{"name": name, "message": message, "time": str(datetime.now())}
)
commit_url = repo.push_to_hub()
print(commit_url)
return commit_url #generate_html()
def greet(text):
response = query_index(index, "Act as a Context expert and answer this with detail:" + text + ". (Include the context reference details, file name, page number, and date if available)")
return response
index = None
api_key = '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'#st.text_input("Enter your OpenAI API key here:", type="password")
if api_key:
os.environ['OPENAI_API_KEY'] = api_key
index = initialize_index(index_name, documents_folder)
if index is None:
st.warning("Please enter your api key first.")
gradio_interface = gradio.Interface(
fn=greet,
inputs="text",
outputs="text",
examples=[
["What can I ask you? Give me 10 different examples."]
],
title="AI KB",
description="Enter a query about your data",
article="© Carlos Diaz Calvi 2023"
)
gradio_interface.launch()