README.md CHANGED
@@ -10,4 +10,19 @@ pinned: false
10
  license: mit
11
  ---
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
10
  license: mit
11
  ---
12
 
13
+ project/
14
+ ├── app.py # Main entry point for the Streamlit app
15
+ ├── prompts.py # Contains prompt-related text (e.g., technical_interviewer_prompt)
16
+ ├── question_handler.py # Handles question generation and metadata processing
17
+ ├── embeddings_utils.py # Utilities for embeddings and similarity calculations
18
+ ├── code_executor.py # Handles code execution and test case validation
19
+ ├── requirements.txt # Python dependencies
20
+ └── utils/
21
+ ├── openai_client.py # OpenAI client setup and response generation
22
+ ├── model_loader.py # SentenceTransformer model loading
23
+ └── constants.py # Constants like file paths or default values
24
+
25
+
26
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
27
+
28
+ sk_ea116beb63dd2643a7829bad710b0bc3ca460e149458ce53
app.py CHANGED
@@ -1,253 +1,34 @@
1
  import streamlit as st
2
- from openai import OpenAI
3
- import os
4
- import pandas as pd
5
- import numpy as np
6
- from sentence_transformers import SentenceTransformer
7
- from sklearn.metrics.pairwise import cosine_similarity
8
- import torch
9
- import re
10
-
11
- # Set up OpenAI client
12
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
13
-
14
- # Check if GPU is available
15
- device = "cuda" if torch.cuda.is_available() else "cpu"
16
- print(f"Using device: {device}")
17
-
18
- # Load metadata and embeddings (ensure these files are in your working directory or update paths)
19
- metadata_path = 'question_metadata.csv' # Update this path if needed
20
- embeddings_path = 'question_dataset_embeddings.npy' # Update this path if needed
21
 
 
22
  metadata = pd.read_csv(metadata_path)
23
  embeddings = np.load(embeddings_path)
24
 
25
- # Load the SentenceTransformer model
26
- model = SentenceTransformer("all-MiniLM-L6-v2").to(device)
27
-
28
- # Load prompts from files
29
- with open("technical_interviewer_prompt.txt", "r") as file:
30
- technical_interviewer_prompt = file.read()
31
-
32
- with open("question_generation_prompt.txt", "r") as file:
33
- question_generation_prompt = file.read()
34
-
35
  st.title("Real-World Programming Question Mock Interview")
36
 
37
- # Initialize session state variables
38
- if "messages" not in st.session_state:
39
- st.session_state.messages = []
40
-
41
- if "follow_up_mode" not in st.session_state:
42
- st.session_state.follow_up_mode = False # Tracks whether we're in follow-up mode
43
-
44
- if "generated_question" not in st.session_state:
45
- st.session_state.generated_question = None # Stores the generated question for persistence
46
-
47
- if "code_template" not in st.session_state:
48
- st.session_state.code_template = "" # Stores the code template
49
-
50
- if "sample_test_case" not in st.session_state:
51
- st.session_state.sample_test_case = "" # Stores the sample test case
52
-
53
- if "expected_output" not in st.session_state:
54
- st.session_state.expected_output = "" # Stores the expected output
55
-
56
- if "debug_logs" not in st.session_state:
57
- st.session_state.debug_logs = None # Stores debug logs for toggling
58
-
59
- # Function to find the top 1 most similar question based on user input
60
- def find_top_question(query):
61
- # Generate embedding for the query
62
- query_embedding = model.encode(query, convert_to_tensor=True, device=device).cpu().numpy()
63
-
64
- # Reshape query_embedding to ensure it is a 2D array
65
- query_embedding = query_embedding.reshape(1, -1) # Reshape to (1, n_features)
66
-
67
- # Compute cosine similarity between query embedding and dataset embeddings
68
- similarities = cosine_similarity(query_embedding, embeddings).flatten() # Flatten to get a 1D array of similarities
69
-
70
- # Get the index of the most similar result (top 1)
71
- top_index = similarities.argsort()[-1] # Index of highest similarity
72
-
73
- # Retrieve metadata for the top result
74
- top_result = metadata.iloc[top_index].copy()
75
- top_result['similarity_score'] = similarities[top_index]
76
-
77
- return top_result
78
-
79
- # Function to generate response using OpenAI API with debugging logs
80
- def generate_response(messages):
81
- # For debug logs, store only the follow-up conversation history
82
- st.session_state.debug_logs = st.session_state.messages # Update debug logs with current conversation
83
-
84
- response = client.chat.completions.create(
85
- model="o1-mini",
86
- messages=messages,
87
- )
88
-
89
- return response.choices[0].message.content
90
-
91
- # Function to extract code template and sample test case from the generated question
92
- def extract_code_and_test_case(generated_question):
93
- code_template = ""
94
- sample_test_case = ""
95
- expected_output = ""
96
-
97
- # Extract code template
98
- code_match = re.search(r'```python(.*?)```', generated_question, re.DOTALL)
99
- if code_match:
100
- code_template = code_match.group(1).strip()
101
- else:
102
- # Default code template if none is found
103
- code_template = "# Write your code here\n"
104
-
105
- # Extract sample test case and expected output
106
- test_case_match = re.search(r'Sample Input:\s*(.*?)\n', generated_question, re.DOTALL)
107
- expected_output_match = re.search(r'Expected Output:\s*(.*?)\n', generated_question, re.DOTALL)
108
- if test_case_match and expected_output_match:
109
- sample_test_case = test_case_match.group(1).strip()
110
- expected_output = expected_output_match.group(1).strip()
111
- else:
112
- sample_test_case = ""
113
- expected_output = ""
114
-
115
- return code_template, sample_test_case, expected_output
116
-
117
- # Move the input form to the sidebar to make it always visible and more compact
118
  with st.sidebar.form(key="input_form"):
119
- st.markdown("## Generate a New Question")
120
- company = st.text_input("Company", value="Google") # Default value: Google
121
- difficulty = st.selectbox("Difficulty", ["Easy", "Medium", "Hard"], index=1) # Default: Medium
122
- topic = st.text_input("Topic", value="Binary Search") # Default: Binary Search
123
  generate_button = st.form_submit_button(label="Generate")
124
 
125
  if generate_button:
126
- # Clear session state and start fresh with follow-up mode disabled
127
- st.session_state.messages = []
128
- st.session_state.follow_up_mode = False
129
-
130
- # Create a query from user inputs and find the most relevant question
131
  query = f"{company} {difficulty} {topic}"
132
- top_question = find_top_question(query)
133
-
134
- # Prepare a detailed prompt for GPT using the top question's details
135
- detailed_prompt = (
136
- f"Transform this LeetCode question into a real-world interview scenario.\n\n"
137
- f"**Company**: {top_question['company']}\n"
138
- f"**Question Name**: {top_question['questionName']}\n"
139
- f"**Difficulty Level**: {top_question['difficulty level']}\n"
140
- f"**Tags**: {top_question['Tags']}\n"
141
- f"**Content**: {top_question['Content']}\n"
142
- f"\nPlease create a real-world interview question based on this information. "
143
- f"Include the following sections:\n\n"
144
- f"- Problem Description\n"
145
- f"- Code Template (in a Python code block)\n"
146
- f"- Sample Input and Expected Output (clearly separated)\n"
147
- )
148
-
149
- # Generate response using OpenAI API with detailed prompt and debugging logs
150
- response = generate_response([{"role": "user", "content": detailed_prompt}]) # Question generation prompt excluded here
151
-
152
- # Store generated question in session state for persistence in sidebar and follow-up conversation state
153
  st.session_state.generated_question = response
154
 
155
- # Extract code template and sample test case
156
- code_template, sample_test_case, expected_output = extract_code_and_test_case(response)
157
- st.session_state.code_template = code_template
158
- st.session_state.sample_test_case = sample_test_case
159
- st.session_state.expected_output = expected_output
160
-
161
- # Enable follow-up mode after generating the initial question
162
- st.session_state.follow_up_mode = True
163
-
164
- # Display chat messages from history on app rerun (for subsequent conversation)
165
- for message in st.session_state.messages:
166
- with st.chat_message(message["role"]):
167
- st.markdown(message["content"])
168
-
169
- # Chatbox for subsequent conversations with assistant (follow-up mode)
170
- if st.session_state.follow_up_mode:
171
- if user_input := st.chat_input("Continue your conversation or ask follow-up questions here:"):
172
- # Display user message in chat message container and add to session history
173
- with st.chat_message("user"):
174
- st.markdown(user_input)
175
-
176
- st.session_state.messages.append({"role": "user", "content": user_input})
177
-
178
- # Prepare messages to send to the assistant
179
- # Include the technical interviewer prompt and generated question, but do not display them
180
- # Add an instruction for the assistant to reply as a real-world interviewer would
181
- assistant_instruction = (
182
- "As a real-world interviewer, please reply to the candidate's follow-up questions "
183
- "specific to the generated interview question, to the point, and in a natural, human-sounding way."
184
- )
185
-
186
- messages_to_send = [
187
- {"role": "user", "content": technical_interviewer_prompt},
188
- {"role": "assistant", "content": st.session_state.generated_question},
189
- {"role": "user", "content": assistant_instruction}
190
- ] + st.session_state.messages
191
-
192
- assistant_response = generate_response(messages_to_send)
193
-
194
- with st.chat_message("assistant"):
195
- st.markdown(assistant_response)
196
-
197
- st.session_state.messages.append({"role": "assistant", "content": assistant_response})
198
-
199
- st.sidebar.markdown("---")
200
- st.sidebar.markdown("## Generated Question")
201
- if st.session_state.generated_question:
202
- st.sidebar.markdown(st.session_state.generated_question)
203
- else:
204
- st.sidebar.markdown("_No question generated yet._")
205
-
206
- st.sidebar.markdown("---")
207
  st.sidebar.markdown("## Python Code Interpreter")
208
-
209
- # Pre-fill code interpreter with code template after question generation
210
- if st.session_state.code_template:
211
- code_input = st.sidebar.text_area("Write your Python code here:", value=st.session_state.code_template, height=300)
212
- else:
213
- code_input = st.sidebar.text_area("Write your Python code here:", height=300)
214
-
215
  if st.sidebar.button("Run Code"):
216
- try:
217
- # Prepare the code for execution
218
- exec_globals = {}
219
- # Create a function wrapper to execute the user's code
220
- exec(f"def user_solution():\n{code_input}", exec_globals)
221
- user_solution = exec_globals.get('user_solution', None)
222
-
223
- # Prepare sample test case execution
224
- if st.session_state.sample_test_case:
225
- # Assume the sample test case is in the format of arguments to the function
226
- test_case = st.session_state.sample_test_case
227
- # Evaluate the test case safely
228
- test_args = eval(test_case)
229
- if not isinstance(test_args, tuple):
230
- test_args = (test_args,)
231
- # Capture the output
232
- returned_output = user_solution(*test_args)
233
- else:
234
- returned_output = user_solution()
235
-
236
- # Display the expected output and returned output
237
- st.sidebar.markdown("### Sample Test Case Result:")
238
- st.sidebar.markdown(f"**Sample Input:** {st.session_state.sample_test_case}")
239
- st.sidebar.markdown(f"**Expected Output:** {st.session_state.expected_output}")
240
- st.sidebar.markdown(f"**Your Output:** {returned_output}")
241
-
242
- # Compare outputs
243
- if str(returned_output) == st.session_state.expected_output:
244
- st.sidebar.success("Your output matches the expected output!")
245
- else:
246
- st.sidebar.error("Your output does not match the expected output.")
247
- except Exception as e:
248
- st.sidebar.error(f"Error: {e}")
249
 
250
- # Right sidebar toggleable debug logs and code interpreter section
251
- with st.expander("Debug Logs (Toggle On/Off)", expanded=False):
252
- if st.session_state.debug_logs:
253
- st.write(st.session_state.debug_logs)
 
1
  import streamlit as st
2
+ from utils.constants import metadata_path, embeddings_path
3
+ from question_handler import find_top_question, generate_detailed_prompt
4
+ from code_executor import execute_code
5
+ from utils.openai_client import generate_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # Load metadata and embeddings
8
  metadata = pd.read_csv(metadata_path)
9
  embeddings = np.load(embeddings_path)
10
 
11
+ # Streamlit UI components (e.g., sidebar, chat interface)
 
 
 
 
 
 
 
 
 
12
  st.title("Real-World Programming Question Mock Interview")
13
 
14
+ # Sidebar form for generating questions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  with st.sidebar.form(key="input_form"):
16
+ company = st.text_input("Company", value="Google")
17
+ difficulty = st.selectbox("Difficulty", ["Easy", "Medium", "Hard"], index=1)
18
+ topic = st.text_input("Topic", value="Binary Search")
 
19
  generate_button = st.form_submit_button(label="Generate")
20
 
21
  if generate_button:
 
 
 
 
 
22
  query = f"{company} {difficulty} {topic}"
23
+ top_question = find_top_question(query, metadata, embeddings)
24
+ detailed_prompt = generate_detailed_prompt(top_question)
25
+ response = generate_response(detailed_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  st.session_state.generated_question = response
27
 
28
+ # Code execution section in the sidebar
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  st.sidebar.markdown("## Python Code Interpreter")
30
+ code_input = st.sidebar.text_area("Write your Python code here:", height=300)
 
 
 
 
 
 
31
  if st.sidebar.button("Run Code"):
32
+ execute_code(code_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ # Display generated questions and follow-up chat logic here...
 
 
 
code_executor.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ def execute_code(code_input):
4
+ try:
5
+ exec_globals = {}
6
+ exec(f"def user_solution():\n{code_input}", exec_globals)
7
+ user_solution = exec_globals.get('user_solution', None)
8
+
9
+ if st.session_state.sample_test_case:
10
+ test_args = eval(st.session_state.sample_test_case)
11
+ if not isinstance(test_args, tuple):
12
+ test_args = (test_args,)
13
+ returned_output = user_solution(*test_args)
14
+ else:
15
+ returned_output = user_solution()
16
+
17
+ st.sidebar.markdown(f"**Your Output:** {returned_output}")
18
+ if str(returned_output) == st.session_state.expected_output:
19
+ st.sidebar.success("Your output matches the expected output!")
20
+ else:
21
+ st.sidebar.error("Your output does not match the expected output.")
22
+ except Exception as e:
23
+ st.sidebar.error(f"Error: {e}")
elevenLabs.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from elevenlabs import ElevenLabs
2
+
3
+ # Initialize the ElevenLabs client with your API key
4
+ client = ElevenLabs(api_key="YOUR_API_KEY")
5
+
6
+ # Generate audio from text
7
+ audio = client.generate(
8
+ text="Welcome to your mock interview. Let's start with your first question.",
9
+ voice="Rachel", # Choose a pre-existing voice or create a custom voice
10
+ model="eleven_monolingual_v1" # Use the English-optimized model
11
+ )
12
+
13
+ # Save the generated audio to a file
14
+ with open("output.mp3", "wb") as f:
15
+ f.write(audio)
embeddings_utils.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+
3
+ def load_model(device="cpu"):
4
+ return SentenceTransformer("all-MiniLM-L6-v2").to(device)
5
+
6
+ def compute_query_embedding(model, query):
7
+ return model.encode(query, convert_to_tensor=True).cpu().numpy()
prompts.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ with open("technical_interviewer_prompt.txt", "r") as file:
2
+ technical_interviewer_prompt = file.read()
3
+
4
+ with open("question_generation_prompt.txt", "r") as file:
5
+ question_generation_prompt = file.read()
question_handler.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from sklearn.metrics.pairwise import cosine_similarity
3
+
4
+ def find_top_question(query, metadata, embeddings):
5
+ query_embedding = model.encode(query, convert_to_tensor=True).cpu().numpy().reshape(1, -1)
6
+ similarities = cosine_similarity(query_embedding, embeddings).flatten()
7
+ top_index = similarities.argsort()[-1]
8
+ top_result = metadata.iloc[top_index].copy()
9
+ top_result['similarity_score'] = similarities[top_index]
10
+ return top_result
11
+
12
+ def generate_detailed_prompt(question_metadata):
13
+ return (
14
+ f"Transform this LeetCode question into a real-world interview scenario.\n\n"
15
+ f"**Company**: {question_metadata['company']}\n"
16
+ f"**Question Name**: {question_metadata['questionName']}\n"
17
+ f"**Difficulty Level**: {question_metadata['difficulty level']}\n"
18
+ f"**Tags**: {question_metadata['Tags']}\n"
19
+ f"**Content**: {question_metadata['Content']}\n"
20
+ f"\nPlease create a real-world interview question based on this information. "
21
+ f"Include sections for problem description, code template, sample input, and expected output."
22
+ )
requirements.txt CHANGED
@@ -5,4 +5,5 @@ numpy
5
  pandas
6
  sentence_transformers
7
  scikit-learn
8
- requests
 
 
5
  pandas
6
  sentence_transformers
7
  scikit-learn
8
+ requests
9
+ elevenlabs
utils/constants.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ metadata_path = 'question_metadata.csv'
2
+ embeddings_path = 'question_dataset_embeddings.npy'
utils/openai_client.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+
3
+ def generate_response(prompt):
4
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
5
+ response = client.chat.completions.create(
6
+ model="gpt-4",
7
+ messages=[{"role": "user", "content": prompt}]
8
+ )
9
+ return response.choices[0].message.content