Spaces:

nsgupta1
/

MockInterviewBot

Sleeping

App Files Files Community

Modularize

by nsgupta1 - opened Nov 22, 2024

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+117

-237

Files changed (10) hide show

README.md +15 -0
app.py +17 -236
code_executor.py +23 -0
elevenLabs.py +15 -0
embeddings_utils.py +7 -0
prompts.py +5 -0
question_handler.py +22 -0
requirements.txt +2 -1
utils/constants.py +2 -0
utils/openai_client.py +9 -0

README.md CHANGED Viewed

@@ -10,4 +10,19 @@ pinned: false
 license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 license: mit
 ---
+project/
+├── app.py                     # Main entry point for the Streamlit app
+├── prompts.py                 # Contains prompt-related text (e.g., technical_interviewer_prompt)
+├── question_handler.py        # Handles question generation and metadata processing
+├── embeddings_utils.py        # Utilities for embeddings and similarity calculations
+├── code_executor.py           # Handles code execution and test case validation
+├── requirements.txt           # Python dependencies
+└── utils/
+    ├── openai_client.py       # OpenAI client setup and response generation
+    ├── model_loader.py        # SentenceTransformer model loading
+    └── constants.py           # Constants like file paths or default values
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+sk_ea116beb63dd2643a7829bad710b0bc3ca460e149458ce53

app.py CHANGED Viewed

@@ -1,253 +1,34 @@
 import streamlit as st
-from openai import OpenAI
-import os
-import pandas as pd
-import numpy as np
-from sentence_transformers import SentenceTransformer
-from sklearn.metrics.pairwise import cosine_similarity
-import torch
-import re
-# Set up OpenAI client
-client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-# Check if GPU is available
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print(f"Using device: {device}")
-# Load metadata and embeddings (ensure these files are in your working directory or update paths)
-metadata_path = 'question_metadata.csv'  # Update this path if needed
-embeddings_path = 'question_dataset_embeddings.npy'  # Update this path if needed
 metadata = pd.read_csv(metadata_path)
 embeddings = np.load(embeddings_path)
-# Load the SentenceTransformer model
-model = SentenceTransformer("all-MiniLM-L6-v2").to(device)
-# Load prompts from files
-with open("technical_interviewer_prompt.txt", "r") as file:
-    technical_interviewer_prompt = file.read()
-with open("question_generation_prompt.txt", "r") as file:
-    question_generation_prompt = file.read()
 st.title("Real-World Programming Question Mock Interview")
-# Initialize session state variables
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-if "follow_up_mode" not in st.session_state:
-    st.session_state.follow_up_mode = False  # Tracks whether we're in follow-up mode
-if "generated_question" not in st.session_state:
-    st.session_state.generated_question = None  # Stores the generated question for persistence
-if "code_template" not in st.session_state:
-    st.session_state.code_template = ""  # Stores the code template
-if "sample_test_case" not in st.session_state:
-    st.session_state.sample_test_case = ""  # Stores the sample test case
-if "expected_output" not in st.session_state:
-    st.session_state.expected_output = ""  # Stores the expected output
-if "debug_logs" not in st.session_state:
-    st.session_state.debug_logs = None  # Stores debug logs for toggling
-# Function to find the top 1 most similar question based on user input
-def find_top_question(query):
-    # Generate embedding for the query
-    query_embedding = model.encode(query, convert_to_tensor=True, device=device).cpu().numpy()
-    # Reshape query_embedding to ensure it is a 2D array
-    query_embedding = query_embedding.reshape(1, -1)  # Reshape to (1, n_features)
-    # Compute cosine similarity between query embedding and dataset embeddings
-    similarities = cosine_similarity(query_embedding, embeddings).flatten()  # Flatten to get a 1D array of similarities
-    # Get the index of the most similar result (top 1)
-    top_index = similarities.argsort()[-1]  # Index of highest similarity
-    # Retrieve metadata for the top result
-    top_result = metadata.iloc[top_index].copy()
-    top_result['similarity_score'] = similarities[top_index]
-    return top_result
-# Function to generate response using OpenAI API with debugging logs
-def generate_response(messages):
-    # For debug logs, store only the follow-up conversation history
-    st.session_state.debug_logs = st.session_state.messages  # Update debug logs with current conversation
-    response = client.chat.completions.create(
-        model="o1-mini",
-        messages=messages,
-    )
-    return response.choices[0].message.content
-# Function to extract code template and sample test case from the generated question
-def extract_code_and_test_case(generated_question):
-    code_template = ""
-    sample_test_case = ""
-    expected_output = ""
-    # Extract code template
-    code_match = re.search(r'```python(.*?)```', generated_question, re.DOTALL)
-    if code_match:
-        code_template = code_match.group(1).strip()
-    else:
-        # Default code template if none is found
-        code_template = "# Write your code here\n"
-    # Extract sample test case and expected output
-    test_case_match = re.search(r'Sample Input:\s*(.*?)\n', generated_question, re.DOTALL)
-    expected_output_match = re.search(r'Expected Output:\s*(.*?)\n', generated_question, re.DOTALL)
-    if test_case_match and expected_output_match:
-        sample_test_case = test_case_match.group(1).strip()
-        expected_output = expected_output_match.group(1).strip()
-    else:
-        sample_test_case = ""
-        expected_output = ""
-    return code_template, sample_test_case, expected_output
-# Move the input form to the sidebar to make it always visible and more compact
 with st.sidebar.form(key="input_form"):
-    st.markdown("## Generate a New Question")
-    company = st.text_input("Company", value="Google")  # Default value: Google
-    difficulty = st.selectbox("Difficulty", ["Easy", "Medium", "Hard"], index=1)  # Default: Medium
-    topic = st.text_input("Topic", value="Binary Search")  # Default: Binary Search
     generate_button = st.form_submit_button(label="Generate")
 if generate_button:
-    # Clear session state and start fresh with follow-up mode disabled
-    st.session_state.messages = []
-    st.session_state.follow_up_mode = False
-    # Create a query from user inputs and find the most relevant question
     query = f"{company} {difficulty} {topic}"
-    top_question = find_top_question(query)
-    # Prepare a detailed prompt for GPT using the top question's details
-    detailed_prompt = (
-        f"Transform this LeetCode question into a real-world interview scenario.\n\n"
-        f"**Company**: {top_question['company']}\n"
-        f"**Question Name**: {top_question['questionName']}\n"
-        f"**Difficulty Level**: {top_question['difficulty level']}\n"
-        f"**Tags**: {top_question['Tags']}\n"
-        f"**Content**: {top_question['Content']}\n"
-        f"\nPlease create a real-world interview question based on this information. "
-        f"Include the following sections:\n\n"
-        f"- Problem Description\n"
-        f"- Code Template (in a Python code block)\n"
-        f"- Sample Input and Expected Output (clearly separated)\n"
-    )
-    # Generate response using OpenAI API with detailed prompt and debugging logs
-    response = generate_response([{"role": "user", "content": detailed_prompt}])  # Question generation prompt excluded here
-    # Store generated question in session state for persistence in sidebar and follow-up conversation state
     st.session_state.generated_question = response
-    # Extract code template and sample test case
-    code_template, sample_test_case, expected_output = extract_code_and_test_case(response)
-    st.session_state.code_template = code_template
-    st.session_state.sample_test_case = sample_test_case
-    st.session_state.expected_output = expected_output
-    # Enable follow-up mode after generating the initial question
-    st.session_state.follow_up_mode = True
-# Display chat messages from history on app rerun (for subsequent conversation)
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-# Chatbox for subsequent conversations with assistant (follow-up mode)
-if st.session_state.follow_up_mode:
-    if user_input := st.chat_input("Continue your conversation or ask follow-up questions here:"):
-        # Display user message in chat message container and add to session history
-        with st.chat_message("user"):
-            st.markdown(user_input)
-        st.session_state.messages.append({"role": "user", "content": user_input})
-        # Prepare messages to send to the assistant
-        # Include the technical interviewer prompt and generated question, but do not display them
-        # Add an instruction for the assistant to reply as a real-world interviewer would
-        assistant_instruction = (
-            "As a real-world interviewer, please reply to the candidate's follow-up questions "
-            "specific to the generated interview question, to the point, and in a natural, human-sounding way."
-        )
-        messages_to_send = [
-            {"role": "user", "content": technical_interviewer_prompt},
-            {"role": "assistant", "content": st.session_state.generated_question},
-            {"role": "user", "content": assistant_instruction}
-        ] + st.session_state.messages
-        assistant_response = generate_response(messages_to_send)
-        with st.chat_message("assistant"):
-            st.markdown(assistant_response)
-        st.session_state.messages.append({"role": "assistant", "content": assistant_response})
-st.sidebar.markdown("---")
-st.sidebar.markdown("## Generated Question")
-if st.session_state.generated_question:
-    st.sidebar.markdown(st.session_state.generated_question)
-else:
-    st.sidebar.markdown("_No question generated yet._")
-st.sidebar.markdown("---")
 st.sidebar.markdown("## Python Code Interpreter")
-# Pre-fill code interpreter with code template after question generation
-if st.session_state.code_template:
-    code_input = st.sidebar.text_area("Write your Python code here:", value=st.session_state.code_template, height=300)
-else:
-    code_input = st.sidebar.text_area("Write your Python code here:", height=300)
 if st.sidebar.button("Run Code"):
-    try:
-        # Prepare the code for execution
-        exec_globals = {}
-        # Create a function wrapper to execute the user's code
-        exec(f"def user_solution():\n{code_input}", exec_globals)
-        user_solution = exec_globals.get('user_solution', None)
-        # Prepare sample test case execution
-        if st.session_state.sample_test_case:
-            # Assume the sample test case is in the format of arguments to the function
-            test_case = st.session_state.sample_test_case
-            # Evaluate the test case safely
-            test_args = eval(test_case)
-            if not isinstance(test_args, tuple):
-                test_args = (test_args,)
-            # Capture the output
-            returned_output = user_solution(*test_args)
-        else:
-            returned_output = user_solution()
-        # Display the expected output and returned output
-        st.sidebar.markdown("### Sample Test Case Result:")
-        st.sidebar.markdown(f"**Sample Input:** {st.session_state.sample_test_case}")
-        st.sidebar.markdown(f"**Expected Output:** {st.session_state.expected_output}")
-        st.sidebar.markdown(f"**Your Output:** {returned_output}")
-        # Compare outputs
-        if str(returned_output) == st.session_state.expected_output:
-            st.sidebar.success("Your output matches the expected output!")
-        else:
-            st.sidebar.error("Your output does not match the expected output.")
-    except Exception as e:
-        st.sidebar.error(f"Error: {e}")
-# Right sidebar toggleable debug logs and code interpreter section
-with st.expander("Debug Logs (Toggle On/Off)", expanded=False):
-    if st.session_state.debug_logs:
-        st.write(st.session_state.debug_logs)

 import streamlit as st
+from utils.constants import metadata_path, embeddings_path
+from question_handler import find_top_question, generate_detailed_prompt
+from code_executor import execute_code
+from utils.openai_client import generate_response
+# Load metadata and embeddings
 metadata = pd.read_csv(metadata_path)
 embeddings = np.load(embeddings_path)
+# Streamlit UI components (e.g., sidebar, chat interface)
 st.title("Real-World Programming Question Mock Interview")
+# Sidebar form for generating questions
 with st.sidebar.form(key="input_form"):
+    company = st.text_input("Company", value="Google")
+    difficulty = st.selectbox("Difficulty", ["Easy", "Medium", "Hard"], index=1)
+    topic = st.text_input("Topic", value="Binary Search")
     generate_button = st.form_submit_button(label="Generate")
 if generate_button:
     query = f"{company} {difficulty} {topic}"
+    top_question = find_top_question(query, metadata, embeddings)
+    detailed_prompt = generate_detailed_prompt(top_question)
+    response = generate_response(detailed_prompt)
     st.session_state.generated_question = response
+# Code execution section in the sidebar
 st.sidebar.markdown("## Python Code Interpreter")
+code_input = st.sidebar.text_area("Write your Python code here:", height=300)
 if st.sidebar.button("Run Code"):
+    execute_code(code_input)
+# Display generated questions and follow-up chat logic here...

code_executor.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import streamlit as st
+def execute_code(code_input):
+    try:
+        exec_globals = {}
+        exec(f"def user_solution():\n{code_input}", exec_globals)
+        user_solution = exec_globals.get('user_solution', None)
+        if st.session_state.sample_test_case:
+            test_args = eval(st.session_state.sample_test_case)
+            if not isinstance(test_args, tuple):
+                test_args = (test_args,)
+            returned_output = user_solution(*test_args)
+        else:
+            returned_output = user_solution()
+        st.sidebar.markdown(f"**Your Output:** {returned_output}")
+        if str(returned_output) == st.session_state.expected_output:
+            st.sidebar.success("Your output matches the expected output!")
+        else:
+            st.sidebar.error("Your output does not match the expected output.")
+    except Exception as e:
+        st.sidebar.error(f"Error: {e}")

elevenLabs.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from elevenlabs import ElevenLabs
+# Initialize the ElevenLabs client with your API key
+client = ElevenLabs(api_key="YOUR_API_KEY")
+# Generate audio from text
+audio = client.generate(
+    text="Welcome to your mock interview. Let's start with your first question.",
+    voice="Rachel",  # Choose a pre-existing voice or create a custom voice
+    model="eleven_monolingual_v1"  # Use the English-optimized model
+)
+# Save the generated audio to a file
+with open("output.mp3", "wb") as f:
+    f.write(audio)

embeddings_utils.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from sentence_transformers import SentenceTransformer
+def load_model(device="cpu"):
+    return SentenceTransformer("all-MiniLM-L6-v2").to(device)
+def compute_query_embedding(model, query):
+    return model.encode(query, convert_to_tensor=True).cpu().numpy()

prompts.py ADDED Viewed

	@@ -0,0 +1,5 @@

+with open("technical_interviewer_prompt.txt", "r") as file:
+    technical_interviewer_prompt = file.read()
+with open("question_generation_prompt.txt", "r") as file:
+    question_generation_prompt = file.read()

question_handler.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import re
+from sklearn.metrics.pairwise import cosine_similarity
+def find_top_question(query, metadata, embeddings):
+    query_embedding = model.encode(query, convert_to_tensor=True).cpu().numpy().reshape(1, -1)
+    similarities = cosine_similarity(query_embedding, embeddings).flatten()
+    top_index = similarities.argsort()[-1]
+    top_result = metadata.iloc[top_index].copy()
+    top_result['similarity_score'] = similarities[top_index]
+    return top_result
+def generate_detailed_prompt(question_metadata):
+    return (
+        f"Transform this LeetCode question into a real-world interview scenario.\n\n"
+        f"**Company**: {question_metadata['company']}\n"
+        f"**Question Name**: {question_metadata['questionName']}\n"
+        f"**Difficulty Level**: {question_metadata['difficulty level']}\n"
+        f"**Tags**: {question_metadata['Tags']}\n"
+        f"**Content**: {question_metadata['Content']}\n"
+        f"\nPlease create a real-world interview question based on this information. "
+        f"Include sections for problem description, code template, sample input, and expected output."
+    )

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ numpy
 pandas
 sentence_transformers
 scikit-learn
-requests

 pandas
 sentence_transformers
 scikit-learn
+requests
+elevenlabs

utils/constants.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ metadata_path = 'question_metadata.csv'
2	+ embeddings_path = 'question_dataset_embeddings.npy'

utils/openai_client.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from openai import OpenAI
+def generate_response(prompt):
+    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+    response = client.chat.completions.create(
+        model="gpt-4",
+        messages=[{"role": "user", "content": prompt}]
+    )
+    return response.choices[0].message.content