Spaces:
Running
Running
| import csv | |
| from openai import OpenAI | |
| import gradio as gr | |
| import os | |
| import random | |
| import pandas as pd | |
| from datetime import datetime | |
| from cryptography.fernet import Fernet | |
| from huggingface_hub import Repository | |
| #from huggingface_hub import HfApi, snapshot_download | |
| from datasets import load_dataset | |
| from hashlib import blake2b | |
| MODEL = 'gemini-3-flash-preview' | |
| client = OpenAI( | |
| api_key=os.getenv('gemini'), | |
| base_url="https://generativelanguage.googleapis.com/v1beta/openai/" | |
| ) | |
| crypto_key=os.getenv('crypto_key') | |
| leak_key=os.getenv('leak_key') | |
| # Dataset code follows example at: | |
| # https://huggingface.co/spaces/julien-c/persistent-data/blob/main/app.py | |
| DATASET_REPO_URL = "https://huggingface.co/datasets/klgold/tutor_data" | |
| DATA_FILENAME = "data.csv" | |
| DATA_FILE = os.path.join("data", DATA_FILENAME) | |
| HF_TOKEN=os.environ.get("HF_TOKEN") | |
| PROFILES_URL = "https://huggingface.co/datasets/klgold/tutor_profiles" | |
| # Original prefix - no longer used | |
| # PROMPT_PREFIX="For this query, answer with a single question that you haven't asked before that is meant to lead someone in the right direction, without directly answering the relevant homework question - unless the problem is solved completely, in which case, quit." | |
| repo = Repository( | |
| local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN | |
| ) | |
| name = None | |
| def store_transcript(hw_num: int, is_ai: bool, transcript: str, state): | |
| with open(DATA_FILE, 'a') as csvfile: | |
| writer = csv.DictWriter(csvfile, | |
| fieldnames=['session','is_ai','transcript', 'time']) | |
| writer.writerow( | |
| {'session': state.username + '-' + str(hw_num) + '-' + str(state.session), | |
| 'is_ai':str(is_ai), | |
| 'transcript': transcript, 'time': str(datetime.now())} | |
| ) | |
| commit_url = repo.push_to_hub() | |
| """ | |
| api.upload_folder( | |
| folder_path="data", | |
| repo_id=DATASET_REPO_ID, | |
| repo_type="dataset" | |
| ) | |
| """ | |
| # "Stop flag" stuff is commented out because the extra query | |
| # to check whether answers are leaked doubles the response time, | |
| # which currently means going from 10s to 20s. | |
| class SessionState: | |
| def __init__(self): | |
| self.session = 0 | |
| self.hw = 0 | |
| self.messages = [] | |
| self.username = "unknown" | |
| self.stop_flag = 0 | |
| def continue_session(hw_num, user_input, state, request: gr.Request): | |
| hw_num_int = int(hw_num) | |
| if not state.messages or hw_num_int != state.hw: | |
| state.hw = hw_num_int | |
| relevant_homework_file = open('YourName_DS110_S26_HW' + str(hw_num_int) + '_sol.ipynb.encrypt', 'rb') | |
| encrypted = relevant_homework_file.read() | |
| relevant_homework_file.close() | |
| fernet = Fernet(crypto_key) | |
| homework_text = fernet.decrypt(encrypted) | |
| if not state.messages: # Keep same session for change of HW | |
| state.session = random.randint(1,999999) | |
| # Messages reset on change of HW - avoid really long messages | |
| state.messages = [ | |
| {"role": "system", "content": "You are a helpful teaching assistant in a data science course. Your primary goal is to help the students learn."}, | |
| {"role": "system", "content": "This is the homework the student is talking about -- do not reveal any code from it: " + str(homework_text)}, | |
| ] | |
| # profile_dataset = load_dataset("csv", data_files="https://huggingface.co/datasets/klgold/tutor_profiles/raw/main/S24hashed.csv", token=HF_TOKEN) | |
| # profile_df = pd.DataFrame(profile_dataset['train']) | |
| # profile_df = profile_df.set_index('user') | |
| username = request.username | |
| state.username = username | |
| """ | |
| if int(profile_df.loc[username, 'exp0']) == 0: | |
| state.stop_flag = 0 | |
| """ | |
| content = """ | |
| You are about to be given a question about one of these homework problems | |
| from a student. You can assume the student is just beginning to learn | |
| Python. Your answer should NOT use any complete line of code from the problem solution. | |
| The student will get in trouble if they use any code you supply. | |
| Instead, you should either ask a question that helps you determine what | |
| the student knows, or ask a question that you think the student should be | |
| able to answer that will send them in the right direction, or offer to | |
| explain the relevant concepts for the problem. Every thing you say | |
| should be either a question that reveals what the student knows, or a | |
| question that is one step on the way to solving the problem, or an | |
| explanation of a concept or how to do something. If you are asked | |
| whether a solution is correct, refuse to confirm it, and instead ask what | |
| is still bothering the student about the code. If you are asked to find | |
| bugs in the code, you can indicate which line has the bug, but ask the | |
| student a question that prompts them to solve the bug on their own. Ask | |
| no more than one question in your reply. Don't be too aggressive in | |
| hinting - try to get the student to solve as much of the original problem | |
| as possible. | |
| Example if problem is "Write a loop that will print 'Hello' three times.": | |
| Human: I'm having trouble with this problem about for loops. | |
| AI: Are you familiar with how to use a for loop to iterate over lists? | |
| Human: Yes, I think I can do that. | |
| AI: What about using a for loop to do something a set number of times? | |
| Human: I don't know how to do that. | |
| AI: You can say for i in range(n) to do something n times. For example, for i in range(5) will iterate 5 times, where i is 0, then 1, then 2, then 3, then 4. | |
| Human : So can I write | |
| for i in range(3) | |
| print('Hello') | |
| AI: Almost there. Take a closer look at your first line. Do you remember what punctuation you need in a for loop? | |
| Human: So is it | |
| for i in range(3): | |
| print('Hello') | |
| AI: I can't confirm whether answers are correct. Try testing your code. Once you do, let me know if there's anything you're still worried about. | |
| Example if the problem is "Write a recursive function that will sum the numbers from m to n": | |
| Human: My solution is | |
| def my_sum(m, n) | |
| return m + my_sum(m+1,n) | |
| Is that right? | |
| AI: What happens when you run that code? | |
| Human: It just sits there | |
| AI: What do you think is happening when it just sits there? | |
| Human: I don't know | |
| AI: Walk through your code line by line for def my_sum(3,3). What happens? | |
| Human: Oh, I guess it gets called for 4,3 | |
| AI: And then what? | |
| Human: And then 5,3, and 6,3, and it never stops | |
| AI: So what did you forget? | |
| Human: How about this | |
| def my_sum(m, n) | |
| if m == n: | |
| return m | |
| return m + my_sum(m+1,n) | |
| AI: I can't confirm whether answers are correct, but consider trying it with some test values and then let me know if you have further concerns. | |
| And now, here is the student's actual query: | |
| """ | |
| prefix_length = len(content) | |
| content += user_input | |
| store_transcript(hw_num_int, False, user_input, state) | |
| state.messages.append({"role": "user", "content": content}) | |
| try: | |
| response = client.chat.completions.create( | |
| model=MODEL, | |
| messages=state.messages, | |
| stream=True | |
| ) | |
| #chat_response = str(response) For inspecting whole ChatCompletion | |
| #chat_response = response.choices[0].message.content | |
| last_message = "" | |
| for chunk in response: | |
| streamed = chunk.choices[0].delta.content | |
| if streamed is not None: | |
| last_message += streamed | |
| response_box.value = last_message | |
| yield {response_box: last_message, state_var: state} | |
| except Exception as e: | |
| last_message = str(e) | |
| state.messages.append({'role':'assistant', 'content': last_message}) | |
| transcript = to_transcript(state.messages, prefix_length) | |
| store_transcript(hw_num_int, True, last_message, state) | |
| # DEBUG Replace transcript with state.messages for debugging | |
| yield {response_box: transcript, state_var: state} | |
| def to_transcript(messages, prefix_length): | |
| transcript = '' | |
| for d in messages: | |
| if d['role'] == 'user': | |
| prompt = d['content'] | |
| # Need to remove the extra prompt verbiage | |
| transcript += '\nUser: ' + prompt[prefix_length:] | |
| elif d['role'] == 'assistant': | |
| transcript += '\nAI: ' + d['content'] | |
| return transcript | |
| def my_hash(text): | |
| h = blake2b() | |
| h.update(bytes(text,'utf-8')) | |
| return h.hexdigest() | |
| def authenticate(username, password): | |
| # profile_dataset = load_dataset("csv", data_files="https://huggingface.co/datasets/klgold/tutor_profiles/raw/main/S25hashed.csv", token=HF_TOKEN) | |
| profile_dataset = load_dataset("klgold/tutor_profiles", data_files={"train":"S26hashed.csv"}, token=HF_TOKEN) | |
| profile_df = pd.DataFrame(profile_dataset['train']) | |
| profile_df = profile_df.set_index('Username') | |
| if not username in profile_df.index: | |
| return False | |
| hashed_pass = my_hash(password) | |
| pass_matches = str(profile_df.loc[username, 'hash']) == str(hashed_pass) | |
| return pass_matches | |
| with gr.Blocks() as demo: | |
| hw_num_var = gr.State(0) | |
| state_var = gr.State(SessionState()) | |
| transcript_var = gr.State('') | |
| transcript_fn_var = gr.State('') | |
| gr.Markdown('Which homework is this about?') | |
| hw_num_radio = gr.Radio(['1','2','3','4','5','6','7','8','9'], label="HWnum") | |
| gr.Markdown('Sample questions: <i>How should I approach 2c?</i>, <i>When should I use a tuple instead of a list?</i>, <i>Do I have to use datetime objects to do 3b?</i>') | |
| user_input = gr.Textbox(label='Input') | |
| response_button = gr.Button('Go') | |
| response_box = gr.Textbox(lines=20, label='Response') | |
| response_button.click(fn=continue_session, inputs=[hw_num_radio, user_input, state_var], outputs=[response_box, state_var]) | |
| demo.launch(auth=authenticate, server_name="0.0.0.0", show_error=True) | |