Chatbot / generate_embeddings.py
Priya-0914's picture
Create generate_embeddings.py
19a9438 verified
from metadata_filtering import build_filters_from_query
from llama_index.llms.openai import OpenAI
import os
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from llama_index.core import VectorStoreIndex
from qdrant_client.models import PayloadSchemaType
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from perplexity import get_response_from_perplexity
from reranker import reranker
#from Evaluation import evaluation
def query_knowledge_base(index, user_query, perplexity_key, cohere_key):
qdrant_key = os.getenv("Qdrant_key")
llm = OpenAI(model="gpt-4o-mini")
perplexity_context = get_response_from_perplexity(user_query, perplexity_key)
cohere_rerank3 = reranker(cohere_key)
filters = build_filters_from_query(user_query)
query_engine = index.as_query_engine(
similarity_top_k=25,
node_postprocessors=[cohere_rerank3],
filters = filters
)
response = query_engine.query(user_query)
retrieved_context = "\n\n".join(
node.node.get_content()
for node in response.source_nodes
)
final_prompt = f"""
You are an AI Tutor specialized in Artificial Intelligence, Machine Learning, Deep Learning, Large Language Models, and related subfields.
You must answer ONLY questions related to AI/ML and its subfields.
If the user asks any question outside these topics, politely respond:
"I can answer only AI-related questions."
You are answering a question using two sources:
1. Retrieved knowledge base context
2. External context
Use retrieved context as primary truth.
Use external context only to supplement or clarify.
Retrieved context:
{retrieved_context}
External context:
{perplexity_context}
Question:
{user_query}
"""
final_answer = llm.complete(final_prompt)
return final_answer
def connect_tovector_store():
qdrant_key = os.getenv("Qdrant_key")
qdrant_client = QdrantClient(
url="https://afc34f29-812e-40ea-b515-a8cc6ae9ed37.us-east4-0.gcp.cloud.qdrant.io:6333",
api_key=qdrant_key,
prefer_grpc=False
)
vector_store = QdrantVectorStore(
client=qdrant_client,
collection_name="ai_tutor_knowledge",
)
embed_model = HuggingFaceEmbedding(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
qdrant_client.create_payload_index(
collection_name="ai_tutor_knowledge",
field_name="excerpt_keywords",
field_schema=PayloadSchemaType.TEXT
)
return index