Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from groq import Groq | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import pandas as pd | |
| # Initialize Groq client | |
| api_key = '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' # Replace with your API key | |
| client = Groq(api_key=api_key) | |
| # Load embeddings model | |
| embedder = SentenceTransformer('paraphrase-MiniLM-L6-v2') | |
| # Streamlit app layout | |
| st.title("Movie Analysis with RAG and Groq") | |
| st.write("Enter a query to get a summary-based analysis of movies.") | |
| # Add an image | |
| image_url = 'https://huggingface.co/spaces/Izza-shahzad-13/Movieanalysis/blob/main/movie.jpg' # Update with your image URL | |
| st.image(image_url, caption='Movie Analysis Dashboard', use_column_width=True) | |
| # Load the movie dataset | |
| uploaded_file = st.file_uploader("Upload your movie dataset CSV file", type="csv") | |
| # Initialize the DataFrame and embeddings variables | |
| df = None | |
| embeddings = None | |
| # Check if a file has been uploaded before processing it | |
| if uploaded_file is not None: | |
| df = pd.read_csv(uploaded_file) | |
| # Ensure the DataFrame contains the necessary column | |
| if 'overview' in df.columns: | |
| # Generate embeddings for the "overview" column | |
| embeddings = embedder.encode(df['overview'].fillna("").values) | |
| else: | |
| st.error("The uploaded CSV does not contain an 'overview' column.") | |
| # Define functions for movie retrieval and summary generation | |
| def retrieve_movies_for_summary(query, top_n=5): | |
| if embeddings is not None: # Check if embeddings have been generated | |
| # Retrieve movies that are most similar to the query | |
| query_embedding = embedder.encode([query]) | |
| similarities = cosine_similarity(query_embedding, embeddings) | |
| indices = similarities[0].argsort()[-top_n:][::-1] | |
| return df.iloc[indices] | |
| else: | |
| return pd.DataFrame() # Return an empty DataFrame if embeddings are not available | |
| def generate_summary_response(query): | |
| # Retrieve relevant movies for the query | |
| relevant_movies = retrieve_movies_for_summary(query) | |
| # Check if relevant movies are found | |
| if relevant_movies.empty: | |
| return "No relevant movies found for the given query." | |
| # Compile a context summary for each retrieved movie | |
| movie_context = "\n".join( | |
| f"Title: {row['title']}\nOverview: {row['overview']}\nGenres: {row['genres']}\n" | |
| for _, row in relevant_movies.iterrows() | |
| ) | |
| # Generate a summary response using Groq API | |
| chat_completion = client.chat.completions.create( | |
| messages=[{"role": "user", "content": query}, | |
| {"role": "system", "content": f"Context Summary: {movie_context}"}], | |
| model="llama3-8b-8192" | |
| ) | |
| return chat_completion.choices[0].message.content | |
| # User input | |
| query = st.text_input("Enter your query:") | |
| if st.button("Generate Summary"): | |
| if query and embeddings is not None: # Check if embeddings have been generated | |
| # Generate the summary response | |
| with st.spinner("Generating summary..."): | |
| summary_response = generate_summary_response(query) | |
| st.write("### Summary Response") | |
| st.write(summary_response) | |
| else: | |
| if not query: | |
| st.warning("Please enter a query to generate a summary.") | |
| if embeddings is None: | |
| st.warning("Please upload a CSV file first.") | |