Recipes-AI

Runtime error

App Files Files Community

Recipes-AI / app.py

tonyliu404

Update app.py

b2bc3f5 verified about 1 year ago

raw

history blame contribute delete

5.4 kB

	import numpy as np # linear algebra
	import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
	import pprint
	import os
	import ast
	import gradio as gr
	from gradio.themes.base import Base
	import weaviate
	from weaviate.embedded import EmbeddedOptions
	from langchain_community.vectorstores import Weaviate
	from langchain.prompts import ChatPromptTemplate
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.schema import Document
	from langchain_community.chat_models import ChatOpenAI
	from langchain.schema.runnable import RunnablePassthrough
	from langchain.schema.output_parser import StrOutputParser
	from langchain_core.messages import HumanMessage, SystemMessage

	os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
	df = pd.read_csv('./RAW_recipes.csv')

	# Variables
	max_length = 231637 #total number of recipes aka rows
	curr_len = 10000 # how much we want to process and embed

	#Concatenate all rows into one string
	curr_i = 0
	recipe_info = []
	for index, row in df.iterrows():
	if curr_i >= curr_len:
	break
	curr_i+=1
	name, id, minutes, contributor_id, submitted, tags, nutrition, n_steps, steps, description, ingredients, n_ingredients = row

	#convert to list
	nutrition = ast.literal_eval(nutrition)
	steps = ast.literal_eval(steps)

	#format nutrition
	nutrition_map = ["Calorie"," Total Fat", 'Sugar', 'Sodium', 'Protein', 'Saturated Fat', 'Total Carbohydrate']
	nutrition_labeled = []
	for label, num in zip(nutrition_map, nutrition):
	if label == "Calorie":
	nutrition_labeled.append(f"{label} : {num} per serving")
	else:
	nutrition_labeled.append(f"{label} : {num} % daily value")

	#format steps
	for i in range(len(steps)):
	steps[i] = f"{i+1}. " + steps[i]
	recipe_info.append(f'''
	{name} : {minutes} minutes, submitted on {submitted}
	description: {description},
	ingredients: {ingredients}
	number of ingredients: {n_ingredients}
	tags: {tags}, nutrition: {nutrition_labeled}, total steps: {n_steps}
	steps: {steps}
	'''.replace("\r", "").replace("\n", ""))


	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)

	#split into recipe_info into chunks
	docs = []
	for doc in recipe_info:
	# Wrap each string in a Document object
	document = Document(page_content=doc) # create a Document object with the content
	chunk = text_splitter.split_documents([document]) # Pass a list of Document objects
	docs.append(chunk)

	# merge all chunks into one
	merged_documents = []
	for doc in docs:
	merged_documents.extend(doc)

	# Hugging Face model for embeddings.
	model_name = "sentence-transformers/all-MiniLM-L6-v2"
	model_kwargs = {'device': 'cpu'}
	embeddings = HuggingFaceEmbeddings(
	model_name=model_name,
	model_kwargs=model_kwargs,
	)

	#initialize weaviate client
	client = weaviate.Client(
	embedded_options = EmbeddedOptions()
	)


	vector_search = Weaviate.from_documents(
	client = client,
	documents = merged_documents,
	embedding = embeddings,
	by_text = False
	)


	# Instantiate Weaviate Vector Search as a retriever

	# Basic RAG.
	# k to search for only the 25 most relevant documents.
	# score_threshold to use only documents with a relevance score above 0.77.
	k = 10
	score_threshold = 0.77

	retriever = vector_search.as_retriever(
	search_type = "mmr",
	search_kwargs = {
	"k": k,
	"score_threshold": score_threshold
	}
	)

	template = """
	You are an assistant for question-answering tasks.
	Use the following pieces of retrieved context to answer the question at the end.
	The following pieces of retrieved context are recipes.
	If you don't know the answer, just say that you don't know. Don't try to make up an answer.
	Dont say anthing mean or offensive.

	Context: {context}

	Question: {question}
	"""

	custom_rag_prompt = ChatPromptTemplate.from_template(template)

	llm = ChatOpenAI(
	model_name="gpt-3.5-turbo",
	temperature=0.2)

	# Regular chain format: chain = prompt \| model \| output_parser
	rag_chain = (
	{"context": retriever, "question": RunnablePassthrough()}
	\| custom_rag_prompt
	\| llm
	\| StrOutputParser()
	)


	def get_response(query):
	return rag_chain.invoke(query)


	with gr.Blocks(theme=Base(), title="RAG Recipe AI") as demo:
	gr.Markdown("""
	# RAG Recipe AI

	This model will answer all your recipe-related questions.
	Enter a question about a recipe, and the system will return an answer based on 10,000 food.com recipes stored in the vector database. \n
	Features Considered: \n
	\t - Cook Time
	\t - Nutrition Information
	\t - Steps
	\t - Ingredients
	\t - Dish Description
	Sample Queries: \n
	\t - What is an easy dessert I can make with apples?
	\t - What is the nutritional information of a Caesar salad?
	\t - How many calories is in an average American burger?
	""")
	textbox = gr.Textbox(label="Question:")
	with gr.Row():
	button = gr.Button("Submit", variant="primary")
	with gr.Column():
	output1 = gr.Textbox(lines=1, max_lines=10, label="Answer:")
	# Call get_response function upon clicking the Submit button.
	button.click(get_response, textbox, outputs=[output1])

	demo.launch()