from typing import List, Optional, TypedDict import streamlit as st import requests import openai import pinecone import json import re PINECONE_API_KEY = st.secrets["PINECONE_API_KEY"] # Set OpenAI API key from Streamlit Secrets OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"] class Metadata(TypedDict): title: str description: str slides: str outcome: str # Initialize OpenAI openai.api_key = OPENAI_API_KEY @st.cache_resource def load_pinecone_index(): pinecone.init(api_key=PINECONE_API_KEY, environment="us-central1-gcp") index_name = "prequelworkshops" return pinecone.Index(index_name) def get_embeddings(texts: List[str]) -> List[List[float]]: """ Embed texts using OpenAI's ada model. Args: texts: The list of texts to embed. Returns: A list of embeddings, each of which is a list of floats. Raises: Exception: If the OpenAI API call fails. """ # Call the OpenAI API to get the embeddings response = openai.Embedding.create(input=texts, model="text-embedding-ada-002") # Extract the embedding data from the response data = response["data"] # type: ignore # Return the embeddings as a list of lists of floats return [result["embedding"] for result in data] # Pinecone fetch function def fetch_workshops(index, query: str): vector = get_embeddings([query])[0] response = index.query( vector=vector, # filter={ # "genre": {"$eq": "documentary"}, # "year": 2019 # }, top_k=10, include_metadata=True ) return [match.metadata for match in response.matches] def format_metadata(metadata: Metadata) -> List[str]: return f"Title: [{metadata['title']}]({metadata['slides']})\n\nDescription: {metadata['description']}" # Streamlit UI st.set_page_config(layout="centered") st.title("Search Prequel Workshops") query = st.text_area("What topics are you looking for workshops about?", height=100) submit_button = st.button("Search") status = st.empty() if submit_button: try: status.text("Fetching relevant workshops...") index = load_pinecone_index() workshops = fetch_workshops(index, query) workshop_text = "\n\n".join([format_metadata(metadata) for metadata in workshops]) status.empty() st.markdown(f"**Generated Curriculum:**\n\n{workshop_text}") except: status.text("Error searching. Please try again")