File size: 2,473 Bytes
b953c09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3e440a
b953c09
 
c3e440a
b953c09
 
 
 
 
c3e440a
b953c09
 
 
c3e440a
b953c09
c3e440a
 
b953c09
 
 
c3e440a
 
 
b953c09
 
 
c3e440a
 
b953c09
c3e440a
 
b953c09
c3e440a
 
 
b953c09
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from typing import List, Optional, TypedDict
import streamlit as st
import requests
import openai
import pinecone
import json
import re

PINECONE_API_KEY = st.secrets["PINECONE_API_KEY"]
# Set OpenAI API key from Streamlit Secrets
OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]

class Metadata(TypedDict):
    title: str
    description: str
    slides: str
    outcome: str

# Initialize OpenAI
openai.api_key = OPENAI_API_KEY

@st.cache_resource
def load_pinecone_index():
    pinecone.init(api_key=PINECONE_API_KEY, environment="us-central1-gcp")
    index_name = "prequelworkshops"
    return pinecone.Index(index_name)

def get_embeddings(texts: List[str]) -> List[List[float]]:
    """
    Embed texts using OpenAI's ada model.

    Args:
        texts: The list of texts to embed.

    Returns:
        A list of embeddings, each of which is a list of floats.

    Raises:
        Exception: If the OpenAI API call fails.
    """
    # Call the OpenAI API to get the embeddings
    response = openai.Embedding.create(input=texts, model="text-embedding-ada-002")

    # Extract the embedding data from the response
    data = response["data"]  # type: ignore

    # Return the embeddings as a list of lists of floats
    return [result["embedding"] for result in data]

# Pinecone fetch function
def fetch_workshops(index, query: str):
    vector = get_embeddings([query])[0]

    response = index.query(
        vector=vector,
        # filter={
        #     "genre": {"$eq": "documentary"},
        #     "year": 2019
        # },
        top_k=10,
        include_metadata=True
    )

    return [match.metadata for match in response.matches] 

def format_metadata(metadata: Metadata) -> List[str]:
    return f"Title: [{metadata['title']}]({metadata['slides']})\n\nDescription: {metadata['description']}"

# Streamlit UI
st.set_page_config(layout="centered")
st.title("Search Prequel Workshops")
query = st.text_area("What topics are you looking for workshops about?", height=100)
submit_button = st.button("Search")
status = st.empty()

if submit_button:
    try:
        status.text("Fetching relevant workshops...")
        index = load_pinecone_index()
        workshops = fetch_workshops(index, query)
        workshop_text = "\n\n".join([format_metadata(metadata) for metadata in workshops])
        status.empty()
        st.markdown(f"**Generated Curriculum:**\n\n{workshop_text}")
    except:
        status.text("Error searching. Please try again")