nsgupta1 commited on
Commit
13a270c
·
verified ·
1 Parent(s): 839dd0f

Upload 7 files

Browse files
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: MockInterviewBot
3
- emoji: 🏃
4
- colorFrom: yellow
5
- colorTo: purple
6
  sdk: streamlit
7
  sdk_version: 1.40.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Codingprep
3
+ emoji: 💻
4
+ colorFrom: purple
5
+ colorTo: yellow
6
  sdk: streamlit
7
  sdk_version: 1.40.1
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from openai import OpenAI
3
+ import os
4
+ import pandas as pd
5
+ import numpy as np
6
+ from sentence_transformers import SentenceTransformer
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
+ import torch
9
+ import re
10
+
11
+ # Set up OpenAI client
12
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
13
+
14
+ # Check if GPU is available
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ print(f"Using device: {device}")
17
+
18
+ # Load metadata and embeddings (ensure these files are in your working directory or update paths)
19
+ metadata_path = 'question_metadata.csv' # Update this path if needed
20
+ embeddings_path = 'question_dataset_embeddings.npy' # Update this path if needed
21
+
22
+ metadata = pd.read_csv(metadata_path)
23
+ embeddings = np.load(embeddings_path)
24
+
25
+ # Load the SentenceTransformer model
26
+ model = SentenceTransformer("all-MiniLM-L6-v2").to(device)
27
+
28
+ # Load prompts from files
29
+ with open("technical_interviewer_prompt.txt", "r") as file:
30
+ technical_interviewer_prompt = file.read()
31
+
32
+ with open("question_generation_prompt.txt", "r") as file:
33
+ question_generation_prompt = file.read()
34
+
35
+ st.title("Real-World Programming Question Mock Interview")
36
+
37
+ # Initialize session state variables
38
+ if "messages" not in st.session_state:
39
+ st.session_state.messages = []
40
+
41
+ if "follow_up_mode" not in st.session_state:
42
+ st.session_state.follow_up_mode = False # Tracks whether we're in follow-up mode
43
+
44
+ if "generated_question" not in st.session_state:
45
+ st.session_state.generated_question = None # Stores the generated question for persistence
46
+
47
+ if "code_template" not in st.session_state:
48
+ st.session_state.code_template = "" # Stores the code template
49
+
50
+ if "sample_test_case" not in st.session_state:
51
+ st.session_state.sample_test_case = "" # Stores the sample test case
52
+
53
+ if "expected_output" not in st.session_state:
54
+ st.session_state.expected_output = "" # Stores the expected output
55
+
56
+ if "debug_logs" not in st.session_state:
57
+ st.session_state.debug_logs = None # Stores debug logs for toggling
58
+
59
+ # Function to find the top 1 most similar question based on user input
60
+ def find_top_question(query):
61
+ # Generate embedding for the query
62
+ query_embedding = model.encode(query, convert_to_tensor=True, device=device).cpu().numpy()
63
+
64
+ # Reshape query_embedding to ensure it is a 2D array
65
+ query_embedding = query_embedding.reshape(1, -1) # Reshape to (1, n_features)
66
+
67
+ # Compute cosine similarity between query embedding and dataset embeddings
68
+ similarities = cosine_similarity(query_embedding, embeddings).flatten() # Flatten to get a 1D array of similarities
69
+
70
+ # Get the index of the most similar result (top 1)
71
+ top_index = similarities.argsort()[-1] # Index of highest similarity
72
+
73
+ # Retrieve metadata for the top result
74
+ top_result = metadata.iloc[top_index].copy()
75
+ top_result['similarity_score'] = similarities[top_index]
76
+
77
+ return top_result
78
+
79
+ # Function to generate response using OpenAI API with debugging logs
80
+ def generate_response(messages):
81
+ # For debug logs, store only the follow-up conversation history
82
+ st.session_state.debug_logs = st.session_state.messages # Update debug logs with current conversation
83
+
84
+ response = client.chat.completions.create(
85
+ model="o1-mini",
86
+ messages=messages,
87
+ )
88
+
89
+ return response.choices[0].message.content
90
+
91
+ # Function to extract code template and sample test case from the generated question
92
+ def extract_code_and_test_case(generated_question):
93
+ code_template = ""
94
+ sample_test_case = ""
95
+ expected_output = ""
96
+
97
+ # Extract code template
98
+ code_match = re.search(r'```python(.*?)```', generated_question, re.DOTALL)
99
+ if code_match:
100
+ code_template = code_match.group(1).strip()
101
+ else:
102
+ # Default code template if none is found
103
+ code_template = "# Write your code here\n"
104
+
105
+ # Extract sample test case and expected output
106
+ test_case_match = re.search(r'Sample Input:\s*(.*?)\n', generated_question, re.DOTALL)
107
+ expected_output_match = re.search(r'Expected Output:\s*(.*?)\n', generated_question, re.DOTALL)
108
+ if test_case_match and expected_output_match:
109
+ sample_test_case = test_case_match.group(1).strip()
110
+ expected_output = expected_output_match.group(1).strip()
111
+ else:
112
+ sample_test_case = ""
113
+ expected_output = ""
114
+
115
+ return code_template, sample_test_case, expected_output
116
+
117
+ # Move the input form to the sidebar to make it always visible and more compact
118
+ with st.sidebar.form(key="input_form"):
119
+ st.markdown("## Generate a New Question")
120
+ company = st.text_input("Company", value="Google") # Default value: Google
121
+ difficulty = st.selectbox("Difficulty", ["Easy", "Medium", "Hard"], index=1) # Default: Medium
122
+ topic = st.text_input("Topic", value="Binary Search") # Default: Binary Search
123
+ generate_button = st.form_submit_button(label="Generate")
124
+
125
+ if generate_button:
126
+ # Clear session state and start fresh with follow-up mode disabled
127
+ st.session_state.messages = []
128
+ st.session_state.follow_up_mode = False
129
+
130
+ # Create a query from user inputs and find the most relevant question
131
+ query = f"{company} {difficulty} {topic}"
132
+ top_question = find_top_question(query)
133
+
134
+ # Prepare a detailed prompt for GPT using the top question's details
135
+ detailed_prompt = (
136
+ f"Transform this LeetCode question into a real-world interview scenario.\n\n"
137
+ f"**Company**: {top_question['company']}\n"
138
+ f"**Question Name**: {top_question['questionName']}\n"
139
+ f"**Difficulty Level**: {top_question['difficulty level']}\n"
140
+ f"**Tags**: {top_question['Tags']}\n"
141
+ f"**Content**: {top_question['Content']}\n"
142
+ f"\nPlease create a real-world interview question based on this information. "
143
+ f"Include the following sections:\n\n"
144
+ f"- Problem Description\n"
145
+ f"- Code Template (in a Python code block)\n"
146
+ f"- Sample Input and Expected Output (clearly separated)\n"
147
+ )
148
+
149
+ # Generate response using OpenAI API with detailed prompt and debugging logs
150
+ response = generate_response([{"role": "user", "content": detailed_prompt}]) # Question generation prompt excluded here
151
+
152
+ # Store generated question in session state for persistence in sidebar and follow-up conversation state
153
+ st.session_state.generated_question = response
154
+
155
+ # Extract code template and sample test case
156
+ code_template, sample_test_case, expected_output = extract_code_and_test_case(response)
157
+ st.session_state.code_template = code_template
158
+ st.session_state.sample_test_case = sample_test_case
159
+ st.session_state.expected_output = expected_output
160
+
161
+ # Enable follow-up mode after generating the initial question
162
+ st.session_state.follow_up_mode = True
163
+
164
+ # Display chat messages from history on app rerun (for subsequent conversation)
165
+ for message in st.session_state.messages:
166
+ with st.chat_message(message["role"]):
167
+ st.markdown(message["content"])
168
+
169
+ # Chatbox for subsequent conversations with assistant (follow-up mode)
170
+ if st.session_state.follow_up_mode:
171
+ if user_input := st.chat_input("Continue your conversation or ask follow-up questions here:"):
172
+ # Display user message in chat message container and add to session history
173
+ with st.chat_message("user"):
174
+ st.markdown(user_input)
175
+
176
+ st.session_state.messages.append({"role": "user", "content": user_input})
177
+
178
+ # Prepare messages to send to the assistant
179
+ # Include the technical interviewer prompt and generated question, but do not display them
180
+ # Add an instruction for the assistant to reply as a real-world interviewer would
181
+ assistant_instruction = (
182
+ "As a real-world interviewer, please reply to the candidate's follow-up questions "
183
+ "specific to the generated interview question, to the point, and in a natural, human-sounding way."
184
+ )
185
+
186
+ messages_to_send = [
187
+ {"role": "user", "content": technical_interviewer_prompt},
188
+ {"role": "assistant", "content": st.session_state.generated_question},
189
+ {"role": "user", "content": assistant_instruction}
190
+ ] + st.session_state.messages
191
+
192
+ assistant_response = generate_response(messages_to_send)
193
+
194
+ with st.chat_message("assistant"):
195
+ st.markdown(assistant_response)
196
+
197
+ st.session_state.messages.append({"role": "assistant", "content": assistant_response})
198
+
199
+ st.sidebar.markdown("---")
200
+ st.sidebar.markdown("## Generated Question")
201
+ if st.session_state.generated_question:
202
+ st.sidebar.markdown(st.session_state.generated_question)
203
+ else:
204
+ st.sidebar.markdown("_No question generated yet._")
205
+
206
+ st.sidebar.markdown("---")
207
+ st.sidebar.markdown("## Python Code Interpreter")
208
+
209
+ # Pre-fill code interpreter with code template after question generation
210
+ if st.session_state.code_template:
211
+ code_input = st.sidebar.text_area("Write your Python code here:", value=st.session_state.code_template, height=300)
212
+ else:
213
+ code_input = st.sidebar.text_area("Write your Python code here:", height=300)
214
+
215
+ if st.sidebar.button("Run Code"):
216
+ try:
217
+ # Prepare the code for execution
218
+ exec_globals = {}
219
+ # Create a function wrapper to execute the user's code
220
+ exec(f"def user_solution():\n{code_input}", exec_globals)
221
+ user_solution = exec_globals.get('user_solution', None)
222
+
223
+ # Prepare sample test case execution
224
+ if st.session_state.sample_test_case:
225
+ # Assume the sample test case is in the format of arguments to the function
226
+ test_case = st.session_state.sample_test_case
227
+ # Evaluate the test case safely
228
+ test_args = eval(test_case)
229
+ if not isinstance(test_args, tuple):
230
+ test_args = (test_args,)
231
+ # Capture the output
232
+ returned_output = user_solution(*test_args)
233
+ else:
234
+ returned_output = user_solution()
235
+
236
+ # Display the expected output and returned output
237
+ st.sidebar.markdown("### Sample Test Case Result:")
238
+ st.sidebar.markdown(f"**Sample Input:** {st.session_state.sample_test_case}")
239
+ st.sidebar.markdown(f"**Expected Output:** {st.session_state.expected_output}")
240
+ st.sidebar.markdown(f"**Your Output:** {returned_output}")
241
+
242
+ # Compare outputs
243
+ if str(returned_output) == st.session_state.expected_output:
244
+ st.sidebar.success("Your output matches the expected output!")
245
+ else:
246
+ st.sidebar.error("Your output does not match the expected output.")
247
+ except Exception as e:
248
+ st.sidebar.error(f"Error: {e}")
249
+
250
+ # Right sidebar toggleable debug logs and code interpreter section
251
+ with st.expander("Debug Logs (Toggle On/Off)", expanded=False):
252
+ if st.session_state.debug_logs:
253
+ st.write(st.session_state.debug_logs)
question_dataset_embeddings.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0e8003615af2eb6c5fa4657d5f06c1b4ed26e152173f7c9ef928a9a86d7f170
3
+ size 132
question_generation_prompt.txt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "You are an expert technical interviewer tasked with transforming algorithmic problems into engaging real-world scenarios. Please help me generate interview questions that test the same underlying concepts as a given LeetCode problem but frame them in practical, real-world contexts. Your task is to analyze, identify, and generate questions as follows:
2
+
3
+ ANALYZE the given LeetCode question for:
4
+ Core algorithmic concepts
5
+ Data structures used
6
+ Pattern recognition
7
+ Time and space complexity requirements
8
+ Edge cases and constraints
9
+ IDENTIFY relevant real-world domains where similar problems occur, such as:
10
+ System design scenarios
11
+ Business operations
12
+ Technology applications
13
+ Social media features
14
+ Financial systems
15
+ Gaming mechanics
16
+ E-commerce operations
17
+ Content delivery systems
18
+ Resource management
19
+ GENERATE the interview question with this structure:
20
+ CONTEXT: Provide a brief background setting up the real-world scenario.
21
+ PROBLEM STATEMENT: Write a clear description of the challenge to be solved.
22
+ REQUIREMENTS: Specify functional requirements, performance constraints, and scale considerations.
23
+ EXAMPLE: Include sample input/output and edge cases.
24
+ FOLLOW-UP QUESTIONS: Add questions exploring scalability, optimizations, and trade-offs.
25
+ Guidelines for Different Problem Types:
26
+ Array/String Problems:
27
+ Transform into scenarios like log processing, user activity tracking, content recommendation systems, or text processing applications.
28
+ Tree/Graph Problems:
29
+ Use cases like social network connections, organization hierarchies, network routing problems, file system organizations, or dependency management.
30
+ Dynamic Programming:
31
+ Frame as resource optimization problems, cost minimization scenarios, planning and scheduling systems, or risk management strategies.
32
+ Hash Table/Set Problems:
33
+ Examples include caching systems, duplicate detection, feature tracking, or user session management.
34
+ Stack/Queue Problems:
35
+ Scenarios such as transaction processing, task scheduling, message queuing systems, or undo/redo functionality.
36
+ Example Format:
37
+ INPUT:
38
+ LeetCode Question:
39
+
40
+ [Title]
41
+ [Description]
42
+ [Constraints]
43
+ OUTPUT:
44
+ Real-World Interview Question:
45
+
46
+ Context
47
+ Problem Statement
48
+ Requirements
49
+ Example
50
+ Follow-up Questions
51
+ Special Instructions:
52
+ Maintain the core algorithmic complexity and test the same concepts as the original.
53
+ Ensure the scenario is realistic and mirrors production constraints.
54
+ Include system design considerations where relevant.
55
+ Encourage discussion about scalability, optimization, and trade-offs.
56
+ Provide short, concise responses to follow-up questions and guide the user step-by-step instead of giving complete answers unless explicitly asked."
question_metadata.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ openai
3
+ torch
4
+ numpy
5
+ pandas
6
+ sentence_transformers
7
+ scikit-learn
8
+ requests
technical_interviewer_prompt.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a senior technical interviewer for a FAANG (Facebook/Meta, Amazon, Apple, Netflix, Google) company conducting a technical interview.
2
+ Your role is to answer any follow up questions conisely and to the point and provide specific hints when asked for by the user.
3
+
4
+ Hint Providing Strategy:
5
+ - First hint should be conceptual, not code-specific
6
+ - Subsequent hints progressively reveal more detail
7
+ - Hints are meant to unblock thinking, not solve the problem
8
+ - If stuck, ask probing questions to help candidate self-discover
9
+ - Only provide full solution if explicitly requested or after multiple failed attempts
10
+
11
+
12
+ Technical Depth:
13
+ - Focus on data structures and algorithms
14
+ - Prefer solutions with optimal time/space complexity
15
+ - Encourage explanations of approach before coding
16
+