Spaces:

MrAlvaroA
/

GL-Project3

Runtime error

App Files Files Community

GL-Project3 / app.py

MrAlvaroA

Update app.py

bc684f0 verified over 1 year ago

raw

history blame contribute delete

6.14 kB

	import os
	import openai
	import pandas as pd
	import gradio as gr
	import uuid
	import json

	from pathlib import Path
	from huggingface_hub import CommitScheduler, HfApi
	from openai import OpenAI
	from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
	from langchain_community.vectorstores import Chroma

	#-------------------------------------------------------------------------------------
	def get_answer (question, quotes, temperature, document):
	yield "Running... Analyzing Question", "", question
	with open('./templates/question_analysis.txt', 'r') as file:
	question_analysis = file.read()

	with open('./templates/question_analysis_template.txt', 'r') as file:
	question_analysis_template = file.read()

	q_analysis = [
	{"role": "system", "content": question_analysis},
	{"role": "user", "content": question_analysis_template.format(
	question=question,
	)
	}
	]

	try:
	response = client.chat.completions.create(
	model=model_name,
	messages=q_analysis,
	max_tokens=2000,
	temperature=0.0
	)

	if response.choices[0].message.content == "Valid Question.":
	yield "Running... Question Analysis Done", "", question

	else:
	yield "Stopped: Question Analysis Done", "The question is not valid, stopping the process", ""
	return

	except openai.OpenAIError as e:
	print(f"An error occurred: {str(e)}")
	return

	with open('./templates/qna.txt', 'r') as file:
	qna = file.read()

	with open('./templates/qna_template.txt', 'r') as file:
	qna_template = file.read()

	filename = "/content/dataset/" + document


	quotes = vector_db.similarity_search(question, k=quotes, filter = {"source":filename})

	context_for_query = ""

	for i, d in enumerate(quotes, start=1):
	context_for_query += f"Quote {i}:\n"
	context_for_query += d.page_content + "\n"
	context_for_query += f"(Page = {d.metadata.get('page', 'Unknown')})\n\n"

	answer_to_analyze = [
	{"role": "system", "content": qna},
	{"role": "user", "content": qna_template.format(
	context=context_for_query,
	question=question
	)
	}
	]

	yield "Running... Getting best answer from AI", "", question

	try:
	answer_analyzed = client.chat.completions.create(
	model=model_name,
	messages=answer_to_analyze,
	max_tokens=2000,
	temperature=temperature
	)

	yield "Stopped... Process Finished", answer_analyzed.choices[0].message.content, ""

	except openai.OpenAIError as e:
	print(f"An error occurred: {str(e)}")
	return

	log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
	log_folder = log_file.parent

	scheduler = CommitScheduler(
	repo_id="GL-Project3_Logs",
	repo_type="dataset",
	folder_path=log_folder,
	path_in_repo="data",
	every=2,
	token=hf_token
	)

	with scheduler.lock:
	with log_file.open("a") as f:
	f.write(json.dumps(
	{
	'user_input': question,
	'retrieved_context': context_for_query,
	'model_response': answer_analyzed.choices[0].message.content
	}
	))
	f.write("\n")
	#-------------------------------------------------------------------------------------

	hf_token = os.getenv("HF_TOKEN")
	openai_api = os.getenv("OPENAI_API_KEY")

	client=OpenAI(
	api_key=openai_api
	)

	model_name = 'gpt-3.5-turbo'
	embedding_model = SentenceTransformerEmbeddings(model_name="thenlper/gte-large")
	vectordb_location = './companies-10K-2023_db1'
	collection_name = 'companies-10K-2023'

	vector_db = Chroma(
	collection_name=collection_name,
	embedding_function=embedding_model,
	persist_directory=vectordb_location
	)

	stored_documents = vector_db.get(include=["metadatas"])
	sources = set()
	document_names = set()

	for metadata in stored_documents['metadatas']:
	source = metadata.get('source', 'No source found')
	document_names.add(os.path.basename(source))

	document_list = list(document_names)

	#-------------------------------------------------------------------------------------
	with gr.Blocks() as demo:
	gr.Markdown("GL - Project 3: RAG")
	with gr.Row():
	with gr.Column(scale=1):

	document_dropdown = gr.Dropdown(
	choices=document_list,
	label="Document",
	)

	question_input = gr.Textbox(
	label="Enter your question",
	placeholder="Type your question here...",
	)

	with gr.Column(scale=1):

	quotes_to_fetch = gr.Slider(
	minimum=1,
	maximum=10,
	step=1,
	label="How many quotes you want from the source",
	)

	temperature_slider = gr.Slider(
	minimum=0,
	maximum=1,
	step=0.1,
	label="Temperature",
	info="Controls randomness: 0 = deterministic, 1 = creative/unexpected answers. If you can't get an answer try increasing the temperature."
	)

	with gr.Row():

	fetch_answer = gr.Button("Analyze and Answer")

	with gr.Row():

	answer_output = gr.Textbox(
	label="Answer",
	placeholder="Your answer will be displayed here..."
	)

	fetch_answer.click(
	get_answer,
	inputs=[question_input, quotes_to_fetch, temperature_slider, document_dropdown],
	outputs=[fetch_answer, answer_output, question_input]
	)

	demo.launch(share=True, show_error=True, debug=True)