import csv
from openai import OpenAI
import gradio as gr
import os
import random
import pandas as pd
from datetime import datetime
from cryptography.fernet import Fernet
from huggingface_hub import Repository
#from huggingface_hub import HfApi, snapshot_download
from datasets import load_dataset
from hashlib import blake2b

MODEL = 'gemini-3-flash-preview'

client = OpenAI(
    api_key=os.getenv('gemini'),
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)
crypto_key=os.getenv('crypto_key')
leak_key=os.getenv('leak_key')
# Dataset code follows example at:
# https://huggingface.co/spaces/julien-c/persistent-data/blob/main/app.py
DATASET_REPO_URL = "https://huggingface.co/datasets/klgold/tutor_data"
DATA_FILENAME = "data.csv"
DATA_FILE = os.path.join("data", DATA_FILENAME)
HF_TOKEN=os.environ.get("HF_TOKEN")
PROFILES_URL = "https://huggingface.co/datasets/klgold/tutor_profiles"
# Original prefix - no longer used
# PROMPT_PREFIX="For this query, answer with a single question that you haven't asked before that is meant to lead someone in the right direction, without directly answering the relevant homework question - unless the problem is solved completely, in which case, quit."
repo = Repository(
    local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
)

name = None


def store_transcript(hw_num: int, is_ai: bool, transcript: str, state):
    with open(DATA_FILE, 'a') as csvfile:
        writer = csv.DictWriter(csvfile, 
                        fieldnames=['session','is_ai','transcript', 'time'])
        writer.writerow(
            {'session': state.username + '-' + str(hw_num) + '-' + str(state.session),
                        'is_ai':str(is_ai), 
             'transcript': transcript, 'time': str(datetime.now())}
        )
    commit_url = repo.push_to_hub()
    """
    api.upload_folder(
        folder_path="data",
        repo_id=DATASET_REPO_ID,
        repo_type="dataset"
    )
    """

# "Stop flag" stuff is commented out because the extra query
# to check whether answers are leaked doubles the response time,
# which currently means going from 10s to 20s.

class SessionState:
    def __init__(self):
        self.session = 0
        self.hw = 0
        self.messages = []
        self.username = "unknown"
        self.stop_flag = 0

def continue_session(hw_num, user_input, state, request: gr.Request):
    hw_num_int = int(hw_num)
    if not state.messages or hw_num_int != state.hw:
        state.hw = hw_num_int
        relevant_homework_file = open('YourName_DS110_S26_HW' + str(hw_num_int) + '_sol.ipynb.encrypt', 'rb')
        encrypted = relevant_homework_file.read()
        relevant_homework_file.close()
        fernet = Fernet(crypto_key)
        homework_text = fernet.decrypt(encrypted)
        if not state.messages:  # Keep same session for change of HW
            state.session = random.randint(1,999999)
        # Messages reset on change of HW - avoid really long messages
        state.messages = [
            {"role": "system", "content": "You are a helpful teaching assistant in a data science course.  Your primary goal is to help the students learn."},
            {"role": "system", "content": "This is the homework the student is talking about -- do not reveal any code from it: " + str(homework_text)},
        ]
        # profile_dataset = load_dataset("csv", data_files="https://huggingface.co/datasets/klgold/tutor_profiles/raw/main/S24hashed.csv", token=HF_TOKEN)
        # profile_df = pd.DataFrame(profile_dataset['train'])
        # profile_df = profile_df.set_index('user')
        username = request.username
        state.username = username
        """
        if int(profile_df.loc[username, 'exp0']) == 0:
            state.stop_flag = 0
        """

    content = """
    You are about to be given a question about one of these homework problems
    from a student.  You can assume the student is just beginning to learn
    Python.  Your answer should NOT use any complete line of code from the problem solution.
    The student will get in trouble if they use any code you supply.
    Instead, you should either ask a question that helps you determine what
    the student knows, or ask a question that you think the student should be
    able to answer that will send them in the right direction, or offer to
    explain the relevant concepts for the problem.  Every thing you say
    should be either a question that reveals what the student knows, or a
    question that is one step on the way to solving the problem, or an
    explanation of a concept or how to do something.  If you are asked
    whether a solution is correct, refuse to confirm it, and instead ask what
    is still bothering the student about the code.  If you are asked to find 
    bugs in the code, you can indicate which line has the bug, but ask the
    student a question that prompts them to solve the bug on their own.  Ask
    no more than one question in your reply.  Don't be too aggressive in
    hinting - try to get the student to solve as much of the original problem
    as possible.

    Example if problem is "Write a loop that will print 'Hello' three times.":
    Human:  I'm having trouble with this problem about for loops.
    AI:  Are you familiar with how to use a for loop to iterate over lists?
    Human:  Yes, I think I can do that.
    AI:  What about using a for loop to do something a set number of times?
    Human:  I don't know how to do that.
    AI:  You can say for i in range(n) to do something n times.  For example, for i in range(5) will iterate 5 times, where i is 0, then 1, then 2, then 3, then 4.
    Human : So can I write
    for i in range(3)
        print('Hello')
    AI:  Almost there.  Take a closer look at your first line.  Do you remember what punctuation you need in a for loop?
    Human:  So is it
    for i in range(3):
        print('Hello')
    AI:  I can't confirm whether answers are correct.  Try testing your code.  Once you do, let me know if there's anything you're still worried about.
    
    Example if the problem is "Write a recursive function that will sum the numbers from m to n":
    Human:  My solution is
    def my_sum(m, n)
        return m + my_sum(m+1,n)
    Is that right?
    AI:  What happens when you run that code?
    Human:  It just sits there
    AI:  What do you think is happening when it just sits there?
    Human:  I don't know
    AI:  Walk through your code line by line for def my_sum(3,3).  What happens?

    Human:  Oh, I guess it gets called for 4,3
    AI:  And then what?
    Human:  And then 5,3, and 6,3, and it never stops
    AI:  So what did you forget?
    Human:  How about this
    def my_sum(m, n)
        if m == n:
            return m
        return m + my_sum(m+1,n)
    AI:  I can't confirm whether answers are correct, but consider trying it with some test values and then let me know if you have further concerns.
    
    And now, here is the student's actual query:
    """
    prefix_length = len(content)
    content += user_input
    store_transcript(hw_num_int, False, user_input, state)
    state.messages.append({"role": "user", "content": content})

    try:
        response = client.chat.completions.create(
            model=MODEL,
            messages=state.messages,
            stream=True
        )
        #chat_response = str(response) For inspecting whole ChatCompletion
        #chat_response = response.choices[0].message.content
        last_message = ""
        for chunk in response:
            streamed = chunk.choices[0].delta.content
            if streamed is not None:
                last_message += streamed
            response_box.value = last_message
            yield {response_box: last_message, state_var: state}
    except Exception as e:
        last_message = str(e)
    state.messages.append({'role':'assistant', 'content': last_message})
    transcript = to_transcript(state.messages, prefix_length)

    store_transcript(hw_num_int, True, last_message, state)
    # DEBUG Replace transcript with state.messages for debugging
    yield {response_box: transcript, state_var: state}

def to_transcript(messages, prefix_length):
    transcript = ''
    for d in messages:
        if d['role'] == 'user':
            prompt = d['content']
            # Need to remove the extra prompt verbiage
            transcript += '\nUser: ' + prompt[prefix_length:]
        elif d['role'] == 'assistant':
            transcript += '\nAI: ' + d['content']
    return transcript

def my_hash(text):
    h = blake2b()
    h.update(bytes(text,'utf-8'))
    return h.hexdigest()

def authenticate(username, password):
    # profile_dataset = load_dataset("csv", data_files="https://huggingface.co/datasets/klgold/tutor_profiles/raw/main/S25hashed.csv", token=HF_TOKEN)
    profile_dataset = load_dataset("klgold/tutor_profiles", data_files={"train":"S26hashed.csv"}, token=HF_TOKEN)
    profile_df = pd.DataFrame(profile_dataset['train'])
    profile_df = profile_df.set_index('Username')
    if not username in profile_df.index:
        return False
    hashed_pass = my_hash(password)

    pass_matches = str(profile_df.loc[username, 'hash']) == str(hashed_pass)

    return pass_matches

with gr.Blocks() as demo:
    hw_num_var = gr.State(0)
    state_var = gr.State(SessionState())
    transcript_var = gr.State('')
    transcript_fn_var = gr.State('')
    gr.Markdown('Which homework is this about?')
    hw_num_radio = gr.Radio(['1','2','3','4','5','6','7','8','9'], label="HWnum")
    gr.Markdown('Sample questions:  <i>How should I approach 2c?</i>, <i>When should I use a tuple instead of a list?</i>, <i>Do I have to use datetime objects to do 3b?</i>')
    user_input = gr.Textbox(label='Input')
    response_button = gr.Button('Go')
    response_box = gr.Textbox(lines=20, label='Response')
    response_button.click(fn=continue_session, inputs=[hw_num_radio, user_input, state_var], outputs=[response_box, state_var])

demo.launch(auth=authenticate, server_name="0.0.0.0", show_error=True)