import csv from openai import OpenAI import gradio as gr import os import random import pandas as pd from datetime import datetime from cryptography.fernet import Fernet from huggingface_hub import Repository #from huggingface_hub import HfApi, snapshot_download from datasets import load_dataset from hashlib import blake2b MODEL = 'gemini-3-flash-preview' client = OpenAI( api_key=os.getenv('gemini'), base_url="https://generativelanguage.googleapis.com/v1beta/openai/" ) crypto_key=os.getenv('crypto_key') leak_key=os.getenv('leak_key') # Dataset code follows example at: # https://huggingface.co/spaces/julien-c/persistent-data/blob/main/app.py DATASET_REPO_URL = "https://huggingface.co/datasets/klgold/tutor_data" DATA_FILENAME = "data.csv" DATA_FILE = os.path.join("data", DATA_FILENAME) HF_TOKEN=os.environ.get("HF_TOKEN") PROFILES_URL = "https://huggingface.co/datasets/klgold/tutor_profiles" # Original prefix - no longer used # PROMPT_PREFIX="For this query, answer with a single question that you haven't asked before that is meant to lead someone in the right direction, without directly answering the relevant homework question - unless the problem is solved completely, in which case, quit." repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN ) name = None def store_transcript(hw_num: int, is_ai: bool, transcript: str, state): with open(DATA_FILE, 'a') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=['session','is_ai','transcript', 'time']) writer.writerow( {'session': state.username + '-' + str(hw_num) + '-' + str(state.session), 'is_ai':str(is_ai), 'transcript': transcript, 'time': str(datetime.now())} ) commit_url = repo.push_to_hub() """ api.upload_folder( folder_path="data", repo_id=DATASET_REPO_ID, repo_type="dataset" ) """ # "Stop flag" stuff is commented out because the extra query # to check whether answers are leaked doubles the response time, # which currently means going from 10s to 20s. class SessionState: def __init__(self): self.session = 0 self.hw = 0 self.messages = [] self.username = "unknown" self.stop_flag = 0 def continue_session(hw_num, user_input, state, request: gr.Request): hw_num_int = int(hw_num) if not state.messages or hw_num_int != state.hw: state.hw = hw_num_int relevant_homework_file = open('YourName_DS110_S26_HW' + str(hw_num_int) + '_sol.ipynb.encrypt', 'rb') encrypted = relevant_homework_file.read() relevant_homework_file.close() fernet = Fernet(crypto_key) homework_text = fernet.decrypt(encrypted) if not state.messages: # Keep same session for change of HW state.session = random.randint(1,999999) # Messages reset on change of HW - avoid really long messages state.messages = [ {"role": "system", "content": "You are a helpful teaching assistant in a data science course. Your primary goal is to help the students learn."}, {"role": "system", "content": "This is the homework the student is talking about -- do not reveal any code from it: " + str(homework_text)}, ] # profile_dataset = load_dataset("csv", data_files="https://huggingface.co/datasets/klgold/tutor_profiles/raw/main/S24hashed.csv", token=HF_TOKEN) # profile_df = pd.DataFrame(profile_dataset['train']) # profile_df = profile_df.set_index('user') username = request.username state.username = username """ if int(profile_df.loc[username, 'exp0']) == 0: state.stop_flag = 0 """ content = """ You are about to be given a question about one of these homework problems from a student. You can assume the student is just beginning to learn Python. Your answer should NOT use any complete line of code from the problem solution. The student will get in trouble if they use any code you supply. Instead, you should either ask a question that helps you determine what the student knows, or ask a question that you think the student should be able to answer that will send them in the right direction, or offer to explain the relevant concepts for the problem. Every thing you say should be either a question that reveals what the student knows, or a question that is one step on the way to solving the problem, or an explanation of a concept or how to do something. If you are asked whether a solution is correct, refuse to confirm it, and instead ask what is still bothering the student about the code. If you are asked to find bugs in the code, you can indicate which line has the bug, but ask the student a question that prompts them to solve the bug on their own. Ask no more than one question in your reply. Don't be too aggressive in hinting - try to get the student to solve as much of the original problem as possible. Example if problem is "Write a loop that will print 'Hello' three times.": Human: I'm having trouble with this problem about for loops. AI: Are you familiar with how to use a for loop to iterate over lists? Human: Yes, I think I can do that. AI: What about using a for loop to do something a set number of times? Human: I don't know how to do that. AI: You can say for i in range(n) to do something n times. For example, for i in range(5) will iterate 5 times, where i is 0, then 1, then 2, then 3, then 4. Human : So can I write for i in range(3) print('Hello') AI: Almost there. Take a closer look at your first line. Do you remember what punctuation you need in a for loop? Human: So is it for i in range(3): print('Hello') AI: I can't confirm whether answers are correct. Try testing your code. Once you do, let me know if there's anything you're still worried about. Example if the problem is "Write a recursive function that will sum the numbers from m to n": Human: My solution is def my_sum(m, n) return m + my_sum(m+1,n) Is that right? AI: What happens when you run that code? Human: It just sits there AI: What do you think is happening when it just sits there? Human: I don't know AI: Walk through your code line by line for def my_sum(3,3). What happens? Human: Oh, I guess it gets called for 4,3 AI: And then what? Human: And then 5,3, and 6,3, and it never stops AI: So what did you forget? Human: How about this def my_sum(m, n) if m == n: return m return m + my_sum(m+1,n) AI: I can't confirm whether answers are correct, but consider trying it with some test values and then let me know if you have further concerns. And now, here is the student's actual query: """ prefix_length = len(content) content += user_input store_transcript(hw_num_int, False, user_input, state) state.messages.append({"role": "user", "content": content}) try: response = client.chat.completions.create( model=MODEL, messages=state.messages, stream=True ) #chat_response = str(response) For inspecting whole ChatCompletion #chat_response = response.choices[0].message.content last_message = "" for chunk in response: streamed = chunk.choices[0].delta.content if streamed is not None: last_message += streamed response_box.value = last_message yield {response_box: last_message, state_var: state} except Exception as e: last_message = str(e) state.messages.append({'role':'assistant', 'content': last_message}) transcript = to_transcript(state.messages, prefix_length) store_transcript(hw_num_int, True, last_message, state) # DEBUG Replace transcript with state.messages for debugging yield {response_box: transcript, state_var: state} def to_transcript(messages, prefix_length): transcript = '' for d in messages: if d['role'] == 'user': prompt = d['content'] # Need to remove the extra prompt verbiage transcript += '\nUser: ' + prompt[prefix_length:] elif d['role'] == 'assistant': transcript += '\nAI: ' + d['content'] return transcript def my_hash(text): h = blake2b() h.update(bytes(text,'utf-8')) return h.hexdigest() def authenticate(username, password): # profile_dataset = load_dataset("csv", data_files="https://huggingface.co/datasets/klgold/tutor_profiles/raw/main/S25hashed.csv", token=HF_TOKEN) profile_dataset = load_dataset("klgold/tutor_profiles", data_files={"train":"S26hashed.csv"}, token=HF_TOKEN) profile_df = pd.DataFrame(profile_dataset['train']) profile_df = profile_df.set_index('Username') if not username in profile_df.index: return False hashed_pass = my_hash(password) pass_matches = str(profile_df.loc[username, 'hash']) == str(hashed_pass) return pass_matches with gr.Blocks() as demo: hw_num_var = gr.State(0) state_var = gr.State(SessionState()) transcript_var = gr.State('') transcript_fn_var = gr.State('') gr.Markdown('Which homework is this about?') hw_num_radio = gr.Radio(['1','2','3','4','5','6','7','8','9'], label="HWnum") gr.Markdown('Sample questions: How should I approach 2c?, When should I use a tuple instead of a list?, Do I have to use datetime objects to do 3b?') user_input = gr.Textbox(label='Input') response_button = gr.Button('Go') response_box = gr.Textbox(lines=20, label='Response') response_button.click(fn=continue_session, inputs=[hw_num_radio, user_input, state_var], outputs=[response_box, state_var]) demo.launch(auth=authenticate, server_name="0.0.0.0", show_error=True)