In [4]:
from langchain.document_loaders import DirectoryLoader,PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
def load_docs(directory_path):
 loader = DirectoryLoader(directory_path, glob="**/*.pdf", loader_cls=PyPDFLoader)
 documents = loader.load()
 return documents

In [6]:
doc = load_docs("data")

In [4]:
print(doc[10].metadata)
print(doc[10].page_content)

{'source': 'data\\a16-ahn.pdf', 'page': 10}
16:10 J. Ahn et al.
Our system uses a commercial image labeling Web service, called IQEngines
[IQEngines 2013], to determine the user’s initial starting position when using our
grocery shopping application. IQEngines uses a combination of computer vision and
crowdsourcing to tag a photo with a label describing the content of the image. For
example, an image of a box of Frosted Cheerios cereal might be labeled “General Mills
Frosted Cheerios”. When an image is submitted to IQEngines, the image is first pro-
cessed by a computer vision system in an effort to provide an accurate label. If the
computer vision system cannot identify the image, then IQEngines passes the image to
its crowdsourcing network for analysis and tagging. According to IQEngines, the time
to return a label for an image varies from a few seconds for the computer vision sys-
tem, to a few minutes for the crowdsourcing system. To ensure fast image labeling in
our experiments, w

In [5]:
def make_chunks(docs):
 text_splitter = RecursiveCharacterTextSplitter(
 chunk_size=1000,
 chunk_overlap=200,
 length_function=len,
 add_start_index=True
 )

 chunks = text_splitter.split_documents(docs)

 return chunks

In [6]:
chunks = make_chunks(doc)
chunks[10]

Document(metadata={'source': 'data\\a16-ahn.pdf', 'page': 3, 'start_index': 0}, page_content='Supporting Healthy Grocery Shopping via Mobile Augmented Reality 16:3\nAR tag grows in size. The tags when clicked reveal nutritional information about the\nproduct. The tags are also colored, for example, green to indicate products that are\nnutritionally preferable (e.g., low-calorie, gluten-free), and red to indicate products to\navoid (e.g., high cholesterol or peanut content). Further, shoppers can specify health\nprofiles which may impact their food purchase choices, such as weight control, heart\ndisease, food allergies, etc. The recommended products shown via AR tags will then\nchange depending on what health condition/concern is indicated by the user. We believe\nour system is the first to integrate augmented reality tagging and pedometry-based\nlocalization with a back-end server to provide health-based grocery recommendations\nat the point-of-purchase. We evaluated the effectiveness

In [7]:
chunks[11]

Document(metadata={'source': 'data\\a16-ahn.pdf', 'page': 3, 'start_index': 754}, page_content='localization with a back-end server to provide health-based grocery recommendations\nat the point-of-purchase. We evaluated the effectiveness of our system in a real gro-\ncery store aisle with 15 actual grocery shopping subjects to determine how easy and\nfast the subjects reported it was to locate healthy food products and avoid unhealthy\nones, using AR tagging with our application. We also evaluated our application’s func-\ntionality and performance by analyzing data we collected from 104 online application\ndemonstration/survey participants.\n2. RELATED WORK\nAugmented reality has been recently applied in the mobile health arena in a variety\nof applications. For example, AR tags are overlaid in a home environment to pro-\nvide instructions to the elderly for tasks like taking medication, cooking, washing,\netc. [Herv´as et al. 2011]. TriggerHunter is an AR-based game that overlays tags

In [1]:
# from langchain.embeddings import HuggingFaceEmbeddings,OpenAIEmbeddings
from langchain.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings
# import chromadb
# from chromadb.utils import embedding_functions
from langchain_chroma import Chroma
from chromadb import Client
from chromadb.config import Settings



In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

HF_TOKEN = os.getenv('HF_TOKEN')

In [3]:
def save_embeddings(chunks, embedding_function,CHROMA_PATH="chroma_db"):
 chroma = Chroma.from_documents(
 documents=chunks,
 embedding=embedding_function,
 persist_directory=CHROMA_PATH,
 collection_name="my_collection"
 )
 # chroma.persist()
 print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")


In [4]:

# Define Chroma database path
CHROMA_PATH = "chroma"

# Initialize Hugging Face embedding
hugging_face_ef = HuggingFaceInferenceAPIEmbeddings(
 api_key=HF_TOKEN,
 model_name="sentence-transformers/all-MiniLM-L6-v2"
)


In [None]:
# save_embeddings(chunks, hugging_face_ef,CHROMA_PATH)

Saved 162 chunks to chroma_db.


 chroma.persist()


In [5]:
db = Chroma(
 persist_directory=CHROMA_PATH,
 embedding_function=hugging_face_ef
 )

In [10]:
db.get(include=['embeddings'])

{'ids': ['e34aab38-8069-440e-8723-d1cedfe48b93',
 '3fa98ab3-e075-4b88-8165-48096c60ca70',
 '1e771030-2db0-4cea-97ad-9d8afdbff741',
 '3fbb6928-c00a-4a6b-8aa0-de2e7bbca3e4',
 '056aba92-2c01-410c-be0e-424b2d52167e',
 '5fac6c9a-fc34-479a-bda9-a8ea03876e68',
 '992ad3e3-15e9-4ba1-a9c8-7e7db6fb8907',
 '056f4dce-6e4e-4801-8601-e664382060e2',
 'e5841c9e-c287-45c2-a0af-7a76c1a5dd98',
 'e926c90d-9e5f-4b57-8531-b2bb96134716',
 '2bf05434-f855-4234-bccb-bb33931e06c8',
 '9380898c-140d-46e5-955a-2d415d911c98',
 'e6f6fd3f-6bb8-4118-9b43-516e1ffbc550',
 'f13fa58c-16d0-41dd-8c3d-f3cba6d637c1',
 'dd52bec5-97bb-4ef1-b0bd-8a7d7acc190a',
 '45e50f14-f4b6-4eb1-8fdd-a177c897cbfe',
 'cad78450-a41c-4f3b-bf4f-370a05d8d1b9',
 'c33cadd3-dcfd-4c24-b990-5c96f41e55d3',
 'f6b9b2cb-a955-49f5-b14e-93a25e27f432',
 'da6ad6f4-87b5-423a-afa6-8d7d30f6bd58',
 'f9453981-a5ab-4aa4-bede-2ce6c52d5d19',
 '540db486-e8dd-4606-9c1d-f132e3ab8be1',
 '03604131-da4e-432b-a9fa-c0f0dbd95489',
 '649f67a5-2cd3-419a-a8a8-b4f07353d1f7',
 '8288e8

In [6]:
def get_top_docs(query,k=4):
 # load db
 db = Chroma(
 persist_directory=CHROMA_PATH,
 embedding_function=hugging_face_ef
 )
 
 # search db
 results = db.similarity_search_with_relevance_scores(query,k=k)
 
 return results
 

In [7]:

# get query input
# query = input("Enter your query: ")
query = "what is FoodKG how it uses AR ?"

result = get_top_docs(query)

In [8]:
result

[(Document(metadata={'file_id': '0', 'session_id': 't-Sdfp5aqCsRH4hrsbTNRZ5EvskxunVBspFm9fU_xRM', 'source': 'uploads\\ar_influence_on_market.md', 'start_index': 3201}, page_content='Sources admit that, despite the problems in AR, it has uses and future directions.\n\nAR can be used to create interactive menus in restaurants so that customers can view three-dimensional visualizations of the dishes and the lists of ingredients in detail [4]. It may enhance customer engagement and possibly your customer choice.\n\nAR can be embedded in food packaging so that consumers get a rich interaction experience [4]. Scanning the packaging shall unlock for users information regarding the nutritional data, the origin, and the process of manufacture along with virtual representations of the food. Such enhanced transparency and engagement might impact the purchasing decision.\n\nAR applications can be further developed to aid in new food product development [4]. Developers can easily identify possible 

In [None]:
import google.generativeai as genai
import os

genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
model = genai.GenerativeModel("gemini-1.5-flash")

 from .autonotebook import tqdm as notebook_tqdm


In [26]:
def generate_response(query):
 PROMPT_TEMPLATE = """
Answer the question given based only on the context given below.
context:
{context}

---
based on the context above answer the following query:
{query}
"""
 top_3_results = get_top_docs(query,k=3)
 # if top result is not releavent that is score less than 0.6
 if top_3_results[0][1] < 0.6:
 print("Query is not relevant to the provided material. So the output might not be accurate.")
 
 context = "\n".join([result[0].page_content for result in top_3_results])
 prompt = PROMPT_TEMPLATE.format(context=context,query=query)
 response = model.generate_content(prompt)
 print("Query:",query)
 print("Response:" ,end=" ")
 for chunk in model.generate_content(prompt, stream=True):
 print(chunk.text, end="", flush=True) 
 print()
 print("Sources:" + ",".join([result[0].metadata['source'] + f" [Page:{int(result[0].metadata['page']) + 1}]" if 'page' in result[0].metadata else "" for result in top_3_results]))

 

In [27]:

generate_response(input())


Query is not relevant to the provided material. So the output might not be accurate.
Query: how ar affect consumer behaivour
Response: Based on the provided text, AR's influence on consumer behavior is mixed. While studies show potential for AR to encourage healthier food choices through personalized suggestions and ingredient substitutions, its effectiveness is lower than traditional methods like FOP labeling. A gap exists between intentions to buy healthier products (indicated by AR apps) and actual behavior in real-world shopping situations. The complexity and ease of use of AR applications are significant hurdles to overcome in order to maximize their impact on consumer behavior.

Sources:data\review_paper.pdf [Page:7],data\review_paper.pdf [Page:6],data\review_paper.pdf [Page:6]
