film_buff / app.py
hightowerr's picture
Update app.py
2b975c5 verified
import streamlit as st
import pandas as pd
import os
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain_openai import OpenAI
import lancedb
from langchain_community.vectorstores import LanceDB
from langchain_core.documents import Document
# Page configuration
st.set_page_config(page_title="Film Buff", page_icon="🎬", layout="wide")
st.header('Film Buff 🎬 - Share your favorite movie and get personalized recommendations', divider=True)
# Load environment variables and initialize embeddings
load_dotenv()
openai_api_key = os.environ['OPENAI_API_KEY']
embeddings = OpenAIEmbeddings()
# Load movie dataset
md = pd.read_pickle('movies.pkl')
# Initialize LanceDB connection
db = lancedb.connect("data/sample-lancedb")
# Check if table exists before creating
if "movies" not in db.table_names():
table_name = db.create_table("movies", md)
table = db.open_table('movies')
# First, create a new DataFrame with the required schema
search_df = pd.DataFrame({
'Genres': md['genres'],
'title': md['title'],
'Overview': md['overview'],
'Rating': md['weighted_rate'],
'text': md['text'],
'adult': md['adult'],
'vector': md['vector'], # The pre-computed embeddings
'metadata': [{'title': title, 'genres': genres}
for title, genres in zip(md['title'], md['genres'])]
})
# Drop the existing table if it exists
if "movies_search" in db.table_names():
db.drop_table("movies_search")
# Create a new table with the correct schema
table = db.create_table("movies_search", data=search_df)
# Create the vector store
docsearch = LanceDB(
connection=db,
embedding=embeddings,
table_name="movies_search"
)
# Sidebar for user preferences
st.sidebar.title("Film Buff Recommendation System")
st.sidebar.markdown("Please enter your details and preferences below:")
# User input collection
age = st.sidebar.slider("What is your age?", 1, 100, 25)
gender = st.sidebar.radio("What is your gender?", ["Male", "Female", "Other"])
unique_genres = sorted(md.explode('genres')["genres"].unique())
genre = st.sidebar.selectbox("What is your favourite film genre?", unique_genres)
# Filter movies based on selected genre
df_filtered = md[md['genres'].apply(lambda x: genre in x)]
# Prompt template for recommendation system
PROMPT_TEMPLATE = """
You are a film recommender system helping users find films matching their preferences.
Context: {context}
User Profile:
Age: {age}
Gender: {gender}
Question: {question}
Your response:"""
# Initialize QA chain
qa = RetrievalQA.from_chain_type(
llm=OpenAI(),
chain_type="stuff",
retriever=docsearch.as_retriever(search_kwargs={'data': df_filtered}),
return_source_documents=True
)
# User query interface
query = st.text_input(
'Enter your question:',
placeholder='What action films do you suggest?'
)
# Add a search button
search_button = st.button('Get Recommendations 🎬')
# Only process query when button is clicked
if search_button and query:
with st.spinner('Searching for the perfect film...'):
result = qa.invoke({
"query": query,
"context": PROMPT_TEMPLATE.format(
context="",
age=age,
gender=gender,
question=query
)
})
st.success('Recommendations found!')
st.write(result['result'])
# Optional: Add a message when no query is entered
elif search_button and not query:
st.warning('Please enter a film-related question first!')