Owen Wang
reworked
c3e440a
from typing import List, Optional, TypedDict
import streamlit as st
import requests
import openai
import pinecone
import json
import re
PINECONE_API_KEY = st.secrets["PINECONE_API_KEY"]
# Set OpenAI API key from Streamlit Secrets
OPENAI_API_KEY = st.secrets["OPENAI_API_KEY"]
class Metadata(TypedDict):
title: str
description: str
slides: str
outcome: str
# Initialize OpenAI
openai.api_key = OPENAI_API_KEY
@st.cache_resource
def load_pinecone_index():
pinecone.init(api_key=PINECONE_API_KEY, environment="us-central1-gcp")
index_name = "prequelworkshops"
return pinecone.Index(index_name)
def get_embeddings(texts: List[str]) -> List[List[float]]:
"""
Embed texts using OpenAI's ada model.
Args:
texts: The list of texts to embed.
Returns:
A list of embeddings, each of which is a list of floats.
Raises:
Exception: If the OpenAI API call fails.
"""
# Call the OpenAI API to get the embeddings
response = openai.Embedding.create(input=texts, model="text-embedding-ada-002")
# Extract the embedding data from the response
data = response["data"] # type: ignore
# Return the embeddings as a list of lists of floats
return [result["embedding"] for result in data]
# Pinecone fetch function
def fetch_workshops(index, query: str):
vector = get_embeddings([query])[0]
response = index.query(
vector=vector,
# filter={
# "genre": {"$eq": "documentary"},
# "year": 2019
# },
top_k=10,
include_metadata=True
)
return [match.metadata for match in response.matches]
def format_metadata(metadata: Metadata) -> List[str]:
return f"Title: [{metadata['title']}]({metadata['slides']})\n\nDescription: {metadata['description']}"
# Streamlit UI
st.set_page_config(layout="centered")
st.title("Search Prequel Workshops")
query = st.text_area("What topics are you looking for workshops about?", height=100)
submit_button = st.button("Search")
status = st.empty()
if submit_button:
try:
status.text("Fetching relevant workshops...")
index = load_pinecone_index()
workshops = fetch_workshops(index, query)
workshop_text = "\n\n".join([format_metadata(metadata) for metadata in workshops])
status.empty()
st.markdown(f"**Generated Curriculum:**\n\n{workshop_text}")
except:
status.text("Error searching. Please try again")