Spaces:

wangfowen
/

hackaithon_app

Runtime error

File size: 7,501 Bytes

9bc85e2
471307c
9bc85e2
471307c
 
 
4b2c4ed
9bc85e2
471307c
86f6d54
9bc85e2
86f6d54
 
 
4b2c4ed
471307c
851ceb5
9bc85e2
4b2c4ed
9bc85e2
851ceb5
 
 
 
 
 
4b2c4ed
471307c
 
 
 
 
 
 
 
9bc85e2
471307c
 
 
 
 
9bc85e2
471307c
 
9bc85e2
471307c
 
9bc85e2
471307c
4b2c4ed
 
471307c
9bc85e2
851ceb5
471307c
 
 
 
 
 
 
 
 
 
4b2c4ed
 
 
 
 
 
86f6d54
471307c
 
6b2d71e
9bc85e2
4b2c4ed
 
9bc85e2
4b2c4ed
12b9fc6
 
 
 
 
 
 
 
 
 
 
 
471307c
 
12b9fc6
471307c
 
 
 
 
 
 
 
 
4b2c4ed
6b2d71e
 
 
 
 
 
 
471307c
 
 
4b2c4ed
6b2d71e
 
 
 
83af9fd
 
 
6b2d71e
4b2c4ed
83af9fd
4b2c4ed
 
6b2d71e
83af9fd
d13ddca
83af9fd
 
9a67629
1d76822
 
c48f703
6b2d71e
1d76822
c48f703
6b2d71e
70a1a09
 
471307c
83af9fd
1645218
70a1a09
83af9fd
851ceb5
1645218
851ceb5
 
 
471307c
 
86f6d54
1645218
 
83af9fd
1645218
 
83af9fd
1645218
 
12b9fc6
471307c
 
 
1645218
70a1a09
471307c
 
4b2c4ed
851ceb5
 
1645218
851ceb5
4b2c4ed
83af9fd
4b2c4ed
 
83af9fd
 
471307c
83af9fd
1d76822
83af9fd
9bc85e2
4b2c4ed
471307c

import streamlit as st
import requests
import openai
import pinecone
import json
import re
from tenacity import retry, stop_after_attempt, wait_exponential

PINECONE_API_KEY = st.secrets["PINECONE_API_KEY"]
# Set OpenAI API key from Streamlit Secrets
OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
# Set maximum token length
MAX_TOKENS = 1024
# Set OpenAI model
MODEL = "gpt-3.5-turbo"

# Initialize OpenAI
openai.api_key = OPENAI_API_KEY
conversation = []

@st.cache_resource
def load_pinecone_index():
    pinecone.init(api_key=PINECONE_API_KEY, environment="us-central1-gcp")
    index_name = "prequelworkshops"
    return pinecone.Index(index_name)

def get_embeddings(texts):
    """
    Embed texts using OpenAI's ada model.

    Args:
        texts: The list of texts to embed.

    Returns:
        A list of embeddings, each of which is a list of floats.

    Raises:
        Exception: If the OpenAI API call fails.
    """
    # Call the OpenAI API to get the embeddings
    response = openai.Embedding.create(input=texts, model="text-embedding-ada-002")

    # Extract the embedding data from the response
    data = response["data"]  # type: ignore

    # Return the embeddings as a list of lists of floats
    return [result["embedding"] for result in data]

# Pinecone fetch function
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=6))
def fetch_lesson(index, query):
    vector = get_embeddings([query])[0]

    return index.query(
        vector=vector,
        # filter={
        #     "genre": {"$eq": "documentary"},
        #     "year": 2019
        # },
        top_k=1,
        include_metadata=True
    )

# OpenAI prompt generation function
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=6))
def query_openai(prompt):
    conversation.append({"role": "user", "content": prompt})
    response = openai.ChatCompletion.create(
        model=MODEL,
        messages=conversation,
        max_tokens=MAX_TOKENS,
        n=1,
        stop=None,
        temperature=0.7
    )
    conversation.append(response.choices[0].message)
    return response.choices[0].message.content.strip()

def extract_arrays(s):
    # Find the starting and ending indices of the array
    start = s.find('[')
    end = s.find(']')

    # Fix any formatting issues from GPT
    s = s[start:end+1]
    s = s.replace('"', "'")
    s = re.sub(r"\[\s*'", '["', s)
    s = re.sub(r"'\s*\]", '"]', s)
    s = re.sub(r",\s*'", ',"', s, 0)
    array_str = s.replace("',", '",')
    print(array_str)

    try:
        arrays = json.loads(array_str)  # try to parse the string as JSON
    except json.JSONDecodeError as e:
        print(e)
        return None  # if parsing fails, return None

    if isinstance(arrays, list):  # if parsing succeeds and the result is a list
        return arrays  # return the list
    else:
        return None 

def generate_curriculum(skills):
    prompt = f"""
    You are a world-class middle and high school educator who develops project-based entrepreneurship curriculum catered to student interests. 
    Create a curriculum of up to 5 lessons for a course based on the student's target skills to learn. 
    Output the curriculum as a javascript array of strings, where each string is a description of the lesson. 
    The output should just be the array and nothing else. 
    Student's target skills: {skills}
    """
    response = query_openai(prompt)
    return extract_arrays(response)

def generate_ideas(metadatas, interests):
    summary = "\n".join([f"- Lesson {i + 1}: {metadata['title']}. The description of the lesson is \"{metadata['description']}\". The learning outcomes are \"{metadata['outcome']}\"" for i, metadata in enumerate(metadatas)])
    prompt = f"""
    We've created a curriculum for the student: 
    {summary} 
    The student has interests in \"{interests}\". 
    What are 5 ambitious projects related to the interests that the student could do after they've learned the skills from this curriculum?
    The output should be a markdown list of the projects and nothing else.
    """
    response = query_openai(prompt)
    return response

def generate_application(metadata, interests):
    prompt = f"""
    You are now writing a description for the lesson titled \"{metadata['title']}\". 
    This description comes after the title so don't mention the title explicitly.
    Sound like you're naturally explaining the lesson in person. 
    Describe the lesson and its outcome in one sentence objectively.
    Next, in one separate and concise sentence, explain what the student can now do after learning this lesson.
    Use one of the previously described ambitious projects that's relevant to this lesson.
    Explain the project in full, this is the first time you're talking to the student about it.
    Don't put the project in quotes, don't explicitly use the words "ambitious project", don't say "for example".
    Don't repeat an example of a project if you've used it for a previous lesson. 
    The sentence should implicitly help the student to feel inspired to connect with the lesson. 
    Don't start your sentences the same way compared to your previous responses.
    """
    response = query_openai(prompt)
    return response

def format_metadata(metadata, interests, i):
    print(metadata)
    application = generate_application(metadata, interests)
    return f"Lesson {i + 1}: [{metadata['title']}]({metadata['slides']})\n\n{application}"

def filtered_metadatas(lessons):
    metadatas = [lesson.matches[0].metadata for lesson in lessons]
    deduped_list = [i for n, i in enumerate(metadatas) if i not in metadatas[n + 1:]]
    return deduped_list

# Streamlit UI
st.set_page_config(layout="centered")
st.title("Discover Prequel")
st.markdown("Connecting learning to real-world experiences and personal passions makes education more engaging, meaningful, and applicable.\n\nUse this tool to generate a list of [Prequel](https://joinprequel.com/)'s life skill workshops relevant to each student's unique interests and goals.")
skills = st.text_input("""What life skills would the student like to learn? List up to 3.

_Examples: public speaking, business, time management, technology_""")
interests = st.text_input("""What are some interests or goals the student has? List up to 3.

_Examples: animals, video games, grow on social media_""")
submit_button = st.button("Generate curriculum")
status = st.empty()

if submit_button:
    status.text("Whipping up a lesson plan as unique as you... this might take a minute")
    curriculum = generate_curriculum(skills)

    if curriculum is not None:
        status.text("Feeding the hamsters powering our servers...")
        index = load_pinecone_index()
        lessons = [fetch_lesson(index, lesson) for lesson in curriculum]
        lessons_metadata = filtered_metadatas(lessons)

        status.text("Harvesting the seeds of wisdom...")
        ideas = generate_ideas(lessons_metadata, interests)

        status.text("Building a bridge from your dreams to reality... just one more moment!")
        lesson_text = "\n\n".join([format_metadata(metadata, interests, i) for i, metadata in enumerate(lessons_metadata)])

        status.empty()
        st.markdown(f"""
        \n\n## Your personalized learning playlist\n\n{lesson_text}
        """)
    else:
        status.text("The Wheel of Education spun out of control! Care to give it another whirl? Click 'Generate curriculum' again")