Priya-0914 commited on
Commit
19a9438
·
verified ·
1 Parent(s): db33d2e

Create generate_embeddings.py

Browse files
Files changed (1) hide show
  1. generate_embeddings.py +95 -0
generate_embeddings.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from metadata_filtering import build_filters_from_query
3
+ from llama_index.llms.openai import OpenAI
4
+ import os
5
+ from llama_index.vector_stores.qdrant import QdrantVectorStore
6
+ from qdrant_client import QdrantClient
7
+ from llama_index.core import VectorStoreIndex
8
+ from qdrant_client.models import PayloadSchemaType
9
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
10
+ from perplexity import get_response_from_perplexity
11
+ from reranker import reranker
12
+ #from Evaluation import evaluation
13
+
14
+
15
+ def query_knowledge_base(index, user_query, perplexity_key, cohere_key):
16
+
17
+ qdrant_key = os.getenv("Qdrant_key")
18
+
19
+ llm = OpenAI(model="gpt-4o-mini")
20
+ perplexity_context = get_response_from_perplexity(user_query, perplexity_key)
21
+ cohere_rerank3 = reranker(cohere_key)
22
+ filters = build_filters_from_query(user_query)
23
+
24
+ query_engine = index.as_query_engine(
25
+ similarity_top_k=25,
26
+ node_postprocessors=[cohere_rerank3],
27
+ filters = filters
28
+ )
29
+
30
+ response = query_engine.query(user_query)
31
+
32
+ retrieved_context = "\n\n".join(
33
+ node.node.get_content()
34
+ for node in response.source_nodes
35
+ )
36
+
37
+ final_prompt = f"""
38
+
39
+ You are an AI Tutor specialized in Artificial Intelligence, Machine Learning, Deep Learning, Large Language Models, and related subfields.
40
+ You must answer ONLY questions related to AI/ML and its subfields.
41
+ If the user asks any question outside these topics, politely respond:
42
+ "I can answer only AI-related questions."
43
+
44
+
45
+ You are answering a question using two sources:
46
+ 1. Retrieved knowledge base context
47
+ 2. External context
48
+
49
+ Use retrieved context as primary truth.
50
+ Use external context only to supplement or clarify.
51
+
52
+ Retrieved context:
53
+ {retrieved_context}
54
+
55
+ External context:
56
+ {perplexity_context}
57
+
58
+ Question:
59
+ {user_query}
60
+ """
61
+
62
+ final_answer = llm.complete(final_prompt)
63
+
64
+ return final_answer
65
+
66
+
67
+
68
+ def connect_tovector_store():
69
+
70
+ qdrant_key = os.getenv("Qdrant_key")
71
+ qdrant_client = QdrantClient(
72
+ url="https://afc34f29-812e-40ea-b515-a8cc6ae9ed37.us-east4-0.gcp.cloud.qdrant.io:6333",
73
+ api_key=qdrant_key,
74
+ prefer_grpc=False
75
+ )
76
+ vector_store = QdrantVectorStore(
77
+ client=qdrant_client,
78
+ collection_name="ai_tutor_knowledge",
79
+ )
80
+
81
+ embed_model = HuggingFaceEmbedding(
82
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
83
+ )
84
+
85
+ index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model)
86
+
87
+ qdrant_client.create_payload_index(
88
+ collection_name="ai_tutor_knowledge",
89
+ field_name="excerpt_keywords",
90
+ field_schema=PayloadSchemaType.TEXT
91
+ )
92
+
93
+ return index
94
+
95
+