File size: 3,406 Bytes
457660f
 
 
 
 
 
 
604c47c
9f0e544
457660f
 
 
 
db0fe6d
 
 
 
4a5fae2
50aa180
3f11ffb
db0fe6d
 
457660f
506adc7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457660f
 
506adc7
 
 
 
 
 
 
 
457660f
 
 
 
83a5da3
 
 
 
 
457660f
 
 
 
 
 
 
 
9f0e544
 
457660f
 
 
 
 
 
db0fe6d
457660f
 
506adc7
83a5da3
 
 
 
 
 
604c47c
 
506adc7
604c47c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st
from groq import Groq
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

# Initialize Groq client
api_key = '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'  # Replace with your API key
client = Groq(api_key=api_key)

# Load embeddings model
embedder = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Streamlit app layout
st.title("Movie Analysis with RAG and Groq")
st.write("Enter a query to get a summary-based analysis of movies.")

# Add an image
image_url = 'https://huggingface.co/spaces/Izza-shahzad-13/Movieanalysis/blob/main/movie.jpg'  # Update with your image URL
st.image(image_url, caption='Movie Analysis Dashboard', use_column_width=True)
# Load the movie dataset
uploaded_file = st.file_uploader("Upload your movie dataset CSV file", type="csv")

# Initialize the DataFrame and embeddings variables
df = None
embeddings = None

# Check if a file has been uploaded before processing it
if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    
    # Ensure the DataFrame contains the necessary column
    if 'overview' in df.columns:
        # Generate embeddings for the "overview" column
        embeddings = embedder.encode(df['overview'].fillna("").values)
    else:
        st.error("The uploaded CSV does not contain an 'overview' column.")

# Define functions for movie retrieval and summary generation
def retrieve_movies_for_summary(query, top_n=5):
    if embeddings is not None:  # Check if embeddings have been generated
        # Retrieve movies that are most similar to the query
        query_embedding = embedder.encode([query])
        similarities = cosine_similarity(query_embedding, embeddings)
        indices = similarities[0].argsort()[-top_n:][::-1]
        return df.iloc[indices]
    else:
        return pd.DataFrame()  # Return an empty DataFrame if embeddings are not available

def generate_summary_response(query):
    # Retrieve relevant movies for the query
    relevant_movies = retrieve_movies_for_summary(query)
    
    # Check if relevant movies are found
    if relevant_movies.empty:
        return "No relevant movies found for the given query."
    
    # Compile a context summary for each retrieved movie
    movie_context = "\n".join(
        f"Title: {row['title']}\nOverview: {row['overview']}\nGenres: {row['genres']}\n" 
        for _, row in relevant_movies.iterrows()
    )

    # Generate a summary response using Groq API
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": query},
                  {"role": "system", "content": f"Context Summary: {movie_context}"}],
        model="llama3-8b-8192"
    )
    
    return chat_completion.choices[0].message.content

# User input
query = st.text_input("Enter your query:")

if st.button("Generate Summary"):
    if query and embeddings is not None:  # Check if embeddings have been generated
        # Generate the summary response
        with st.spinner("Generating summary..."):
            summary_response = generate_summary_response(query)
        st.write("### Summary Response")
        st.write(summary_response)
    else:
        if not query:
            st.warning("Please enter a query to generate a summary.")
        if embeddings is None:
            st.warning("Please upload a CSV file first.")