import streamlit as st from groq import Groq from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import pandas as pd # Initialize Groq client api_key = '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' # Replace with your API key client = Groq(api_key=api_key) # Load embeddings model embedder = SentenceTransformer('paraphrase-MiniLM-L6-v2') # Streamlit app layout st.title("Movie Analysis with RAG and Groq") st.write("Enter a query to get a summary-based analysis of movies.") # Add an image image_url = 'https://huggingface.co/spaces/Izza-shahzad-13/Movieanalysis/blob/main/movie.jpg' # Update with your image URL st.image(image_url, caption='Movie Analysis Dashboard', use_column_width=True) # Load the movie dataset uploaded_file = st.file_uploader("Upload your movie dataset CSV file", type="csv") # Initialize the DataFrame and embeddings variables df = None embeddings = None # Check if a file has been uploaded before processing it if uploaded_file is not None: df = pd.read_csv(uploaded_file) # Ensure the DataFrame contains the necessary column if 'overview' in df.columns: # Generate embeddings for the "overview" column embeddings = embedder.encode(df['overview'].fillna("").values) else: st.error("The uploaded CSV does not contain an 'overview' column.") # Define functions for movie retrieval and summary generation def retrieve_movies_for_summary(query, top_n=5): if embeddings is not None: # Check if embeddings have been generated # Retrieve movies that are most similar to the query query_embedding = embedder.encode([query]) similarities = cosine_similarity(query_embedding, embeddings) indices = similarities[0].argsort()[-top_n:][::-1] return df.iloc[indices] else: return pd.DataFrame() # Return an empty DataFrame if embeddings are not available def generate_summary_response(query): # Retrieve relevant movies for the query relevant_movies = retrieve_movies_for_summary(query) # Check if relevant movies are found if relevant_movies.empty: return "No relevant movies found for the given query." # Compile a context summary for each retrieved movie movie_context = "\n".join( f"Title: {row['title']}\nOverview: {row['overview']}\nGenres: {row['genres']}\n" for _, row in relevant_movies.iterrows() ) # Generate a summary response using Groq API chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": query}, {"role": "system", "content": f"Context Summary: {movie_context}"}], model="llama3-8b-8192" ) return chat_completion.choices[0].message.content # User input query = st.text_input("Enter your query:") if st.button("Generate Summary"): if query and embeddings is not None: # Check if embeddings have been generated # Generate the summary response with st.spinner("Generating summary..."): summary_response = generate_summary_response(query) st.write("### Summary Response") st.write(summary_response) else: if not query: st.warning("Please enter a query to generate a summary.") if embeddings is None: st.warning("Please upload a CSV file first.")