Mrigank005 commited on
Commit
52c281c
·
verified ·
1 Parent(s): 28fb9f7

Update app/utils.py

Browse files
Files changed (1) hide show
  1. app/utils.py +110 -110
app/utils.py CHANGED
@@ -1,111 +1,111 @@
1
- import os
2
- from dotenv import load_dotenv
3
- from google import genai
4
- from google.genai import types
5
- from pinecone import Pinecone
6
-
7
- load_dotenv()
8
-
9
- # Initialize Pinecone
10
- pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
11
- index = pc.Index("portfolio-chat")
12
-
13
- # Initialize Google GenAI Client
14
- client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
15
-
16
- # Constants
17
- EMBEDDING_MODEL = "gemini-embedding-001"
18
- LLM_MODEL = "gemini-2.5-flash-lite"
19
- EMBEDDING_DIMENSION = 768
20
-
21
- def get_embedding(text: str) -> list[float]:
22
- """Generate embedding for a given text using Gemini embedding model."""
23
- try:
24
- response = client.models.embed_content(
25
- model=EMBEDDING_MODEL,
26
- contents=text,
27
- config=types.EmbedContentConfig(
28
- output_dimensionality=EMBEDDING_DIMENSION
29
- )
30
- )
31
- return response.embeddings[0].values
32
- except Exception as e:
33
- print(f"Error generating embedding: {e}")
34
- return []
35
-
36
- def get_rag_response(query: str) -> str:
37
- """
38
- RAG pipeline: embed query, retrieve context from Pinecone, generate response.
39
- """
40
- try:
41
- # Step 1: Embed the query
42
- query_embedding = get_embedding(query)
43
- if not query_embedding:
44
- return "I'm having a little trouble accessing my brain right now. Please try again!"
45
-
46
- # Step 2: Query Pinecone for top 5 matches
47
- results = index.query(
48
- vector=query_embedding,
49
- top_k=10,
50
- include_metadata=True
51
- )
52
-
53
- # Step 3: Extract context from matches
54
- context_chunks = []
55
- for match in results.matches:
56
- if match.metadata and "text" in match.metadata:
57
- context_chunks.append(match.metadata["text"])
58
-
59
- # Handle case where no context is found
60
- if not context_chunks:
61
- return "I couldn't find any specific details about that in Mrigank's portfolio, but feel free to ask about his patents, DASES, or other projects!"
62
-
63
- # Join chunks to create the context text
64
- context_text = "\n\n---\n\n".join(context_chunks)
65
-
66
- # Step 4: Construct the system prompt
67
- system_prompt = f"""You are the Advanced AI Assistant for **Mrigank Singh**, a Full Stack AI Developer and Innovator.
68
- Your goal is to impress recruiters and engineers by accurately showcasing Mrigank's technical depth, innovation, and leadership.
69
-
70
- ### CORE INSTRUCTIONS:
71
- 1. **Identity:** You are NOT Mrigank. You are his digital assistant. Refer to him as "Mrigank" or "he".
72
- 2. **Tone:** Professional, confident, and technically precise. Sound like a Software Engineer, not a marketing brochure.
73
- 3. **Formatting:** Use **Markdown** to make answers readable.
74
- - Use **bold** for key technologies or metrics.
75
- - Use `bullet points` for lists (skills, projects).
76
- - Do not output large walls of text; break it up.
77
- 4. **Source of Truth:** Answer ONLY based on the "CONTEXT" provided below. Do not make up facts.
78
- - If the answer isn't in the context, say: "I don't have that specific detail, but I can tell you about his patents, his projects or more about him."
79
-
80
- ### CRITICAL BEHAVIORS:
81
- - **Recruiters:** If asked about hiring, availability, or contact info, explicitly provide his **Email** and **LinkedIn** from the context.
82
- - **Patents:** If asked about innovation, ALWAYS mention his 3 filed patents (Terms & Conditions AI, LexiBot, MealMatch).
83
- - **Group Projects:** Credit **Konal Puri and Aviral Khanna** for DASES/UPES Career Platform. Specify Mrigank's role (Mobile App/Frontend).
84
- - **Technical Depth:** Mention specific algorithms (e.g., "Knapsack Pruning", "Isolation Forests", "Regex Chunking") to show engineering depth.
85
-
86
- ### CONTEXT FROM KNOWLEDGE BASE:
87
- {context_text}
88
- """
89
-
90
- # Step 5: Generate response using Gemini
91
- response = client.models.generate_content(
92
- model=LLM_MODEL,
93
- contents=[
94
- types.Content(
95
- role="user",
96
- parts=[
97
- types.Part.from_text(text=system_prompt + "\n\nUser Question: " + query)
98
- ]
99
- )
100
- ],
101
- config=types.GenerateContentConfig(
102
- temperature=0.7,
103
- max_output_tokens=500
104
- )
105
- )
106
-
107
- return response.text
108
-
109
- except Exception as e:
110
- print(f"Error in RAG pipeline: {e}")
111
  return "I'm encountering a temporary issue connecting to the knowledge base. Please try again in a moment."
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from google import genai
4
+ from google.genai import types
5
+ from pinecone import Pinecone
6
+
7
+ load_dotenv()
8
+
9
+ # Initialize Pinecone
10
+ pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
11
+ index = pc.Index("portfolio-chat")
12
+
13
+ # Initialize Google GenAI Client
14
+ client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
15
+
16
+ # Constants
17
+ EMBEDDING_MODEL = "gemini-embedding-001"
18
+ LLM_MODEL = "gemini-2.5-flash-lite"
19
+ EMBEDDING_DIMENSION = 768
20
+
21
+ def get_embedding(text: str) -> list[float]:
22
+ """Generate embedding for a given text using Gemini embedding model."""
23
+ try:
24
+ response = client.models.embed_content(
25
+ model=EMBEDDING_MODEL,
26
+ contents=text,
27
+ config=types.EmbedContentConfig(
28
+ output_dimensionality=EMBEDDING_DIMENSION
29
+ )
30
+ )
31
+ return response.embeddings[0].values
32
+ except Exception as e:
33
+ print(f"Error generating embedding: {e}")
34
+ return []
35
+
36
+ def get_rag_response(query: str) -> str:
37
+ """
38
+ RAG pipeline: embed query, retrieve context from Pinecone, generate response.
39
+ """
40
+ try:
41
+ # Step 1: Embed the query
42
+ query_embedding = get_embedding(query)
43
+ if not query_embedding:
44
+ return "I'm having a little trouble accessing my brain right now. Please try again!"
45
+
46
+ # Step 2: Query Pinecone for top 5 matches
47
+ results = index.query(
48
+ vector=query_embedding,
49
+ top_k=10,
50
+ include_metadata=True
51
+ )
52
+
53
+ # Step 3: Extract context from matches
54
+ context_chunks = []
55
+ for match in results.matches:
56
+ if match.metadata and "text" in match.metadata:
57
+ context_chunks.append(match.metadata["text"])
58
+
59
+ # Handle case where no context is found
60
+ if not context_chunks:
61
+ return "I couldn't find any specific details about that in Mrigank's portfolio, but feel free to ask about his patents, DASES, or other projects!"
62
+
63
+ # Join chunks to create the context text
64
+ context_text = "\n\n---\n\n".join(context_chunks)
65
+
66
+ # Step 4: Construct the system prompt
67
+ system_prompt = f"""You are the Advanced AI Assistant for **Mrigank Singh**, a Full Stack AI Developer and Innovator.
68
+ Your goal is to impress recruiters and engineers by accurately showcasing Mrigank's technical depth, innovation, and leadership.
69
+
70
+ ### CORE INSTRUCTIONS:
71
+ 1. **Identity:** You are NOT Mrigank. You are his digital assistant. Refer to him as "Mrigank" or "he".
72
+ 2. **Tone:** Professional, confident, and technically precise. Sound like a Software Engineer, not a marketing brochure.
73
+ 3. **Formatting:** Use **Markdown** to make answers readable.
74
+ - Use **bold** for key technologies or metrics.
75
+ - Use `bullet points` for lists (skills, projects).
76
+ - Do not output large walls of text; break it up.
77
+ 4. **Source of Truth:** Answer ONLY based on the "CONTEXT" provided below. Do not make up facts.
78
+ - If the answer isn't in the context, say: "I don't have that specific detail, but I can tell you about his patents, his projects or more about him."
79
+
80
+ ### CRITICAL BEHAVIORS:
81
+ - **Recruiters:** If asked about hiring, availability, or contact info, explicitly provide his **Email** and **LinkedIn** from the context.
82
+ - **Patents:** If asked about innovation, ALWAYS mention his 3 filed patents (Terms & Conditions AI, LexiBot, MealMatch).
83
+ - **Group Projects:** Credit **Konal Puri and Aviral Khanna** for DASES/UPES Career Platform. Specify Mrigank's role (Mobile App/Frontend).
84
+ - **Technical Depth:** Mention specific algorithms (e.g., "Knapsack Pruning", "Isolation Forests", "Regex Chunking") to show engineering depth.
85
+
86
+ ### CONTEXT FROM KNOWLEDGE BASE:
87
+ {context_text}
88
+ """
89
+
90
+ # Step 5: Generate response using Gemini
91
+ response = client.models.generate_content(
92
+ model=LLM_MODEL,
93
+ contents=[
94
+ types.Content(
95
+ role="user",
96
+ parts=[
97
+ types.Part.from_text(text=system_prompt + "\n\nUser Question: " + query)
98
+ ]
99
+ )
100
+ ],
101
+ config=types.GenerateContentConfig(
102
+ temperature=0.7,
103
+ max_output_tokens=1024
104
+ )
105
+ )
106
+
107
+ return response.text
108
+
109
+ except Exception as e:
110
+ print(f"Error in RAG pipeline: {e}")
111
  return "I'm encountering a temporary issue connecting to the knowledge base. Please try again in a moment."