File size: 8,355 Bytes
3056267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import streamlit as st
import pandas as pd
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS

# Load environment variables
load_dotenv()

LANGSMITH_TRACING = os.getenv("LANGSMITH_TRACING")
LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
LANGSMITH_ENDPOINT = os.getenv("LANGSMITH_ENDPOINT") 
LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT")
class CourseSearchSystem:
    def __init__(self):
        """

        Initialize the course search system with Google's Generative AI

        """
        # Initialize the generative model for response generation
        self.generation_model = ChatGoogleGenerativeAI(
            model="gemini-1.5-pro", 
            convert_system_message_to_human=True, # Use the Gemini Pro model
            google_api_key=os.getenv('GOOGLE_API_KEY'),
            temperature=0.1,     # Lower temperature for more consistent outputs
            top_p=0.8,           # Reasonable top_p value for focused sampling
            top_k=40,            # Standard top_k value
            max_output_tokens=2048  # Ensure sufficient length for detailed analysis
        )
        
        # Initialize the embedding model for RAG
        self.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
        self.vector_store = None
        self.course_data = []

    def process_course(self, row):
        """

        Process a single course row into a formatted string

        """
        return f"""

        TITLE: {row['Title']}

        BRIEF: {row['Brief']}

        LEVEL: {row['Level']}

        DURATION: {row['Duration']}

        DESCRIPTION: {row['Description']}

        URL: {row['Link']}

        CURRICULUM: {row['Curriculum']}

        TARGET AUDIENCE AND BENEFITS: {row['What should enroll & takeaway']}

        """

    def create_vector_store(self, df):
        """

        Create vector store from course data

        """
        try:
            texts = []
            for _, row in df.iterrows():
                doc = self.process_course(row)
                texts.append(doc)
                self.course_data.append({
                    'title': row['Title'],
                    'brief': row['Brief'],
                    'level': row['Level'],
                    'duration': row['Duration'],
                    'url': row['Link'],
                    'curriculum': row['Curriculum'],
                    'target_audience': row['What should enroll & takeaway']
                })
            
            # Create the vector store using the embedding model
            self.vector_store = FAISS.from_texts(texts, self.embeddings)
        except Exception as e:
            st.error(f"Error creating vector store: {str(e)}")
            raise

    def search_courses(self, query, k=3):
        """

        Search for relevant courses based on query

        """
        try:
            if not self.vector_store:
                return "Error: Search index not initialized.", []

            # Perform similarity search using the vector store
            similar_docs = self.vector_store.similarity_search(query, k=k)
            
            relevant_courses = []
            relevant_chunks = []
            
            for doc in similar_docs:
                doc_content = doc.page_content
                try:
                    idx = next(i for i, course in enumerate(self.course_data) 
                             if course['title'] in doc_content)
                    relevant_courses.append(self.course_data[idx])
                    relevant_chunks.append(doc_content)
                except StopIteration:
                    continue

            if not relevant_courses:
                return "No matching courses found for your query.", []
            
            # Generate analysis using the generative model
            context = f"""

            Act as an experienced course advisor analyzing courses for a student interested in: "{query}"



            Based on their interest, analyze these relevant courses:

            {relevant_chunks}



            Provide a detailed analysis that includes:

            1. Query Analysis: What specific learning needs or interests are indicated by this query

            2. Course Recommendations: For each relevant course:

               - Explain why it matches the student's needs

               - Highlight key features and benefits

               - Specify who would benefit most from this course

            3. Best Match: Identify the most suitable course and explain

            4. Learning Path: Suggest how the student might progress through these courses if relevant



            Be specific in your analysis, mentioning course titles and concrete features.

            Focus on how each course addresses the student's learning objectives.

            """
            
            # Use .invoke() to generate a response
            response = self.generation_model.invoke(context)

            # Extract the content from the response
            if hasattr(response, 'content'):
                parsed_response = response.content
            else:
                parsed_response = str(response)  # Fallback in case of unexpected structure

            return parsed_response, relevant_courses
        except Exception as e:
            st.error(f"Error during course search: {str(e)}")
            return f"Error during course search: {str(e)}", []

def main():
    """

    Main function to run the Streamlit application

    """
    st.title("πŸŽ“ Analytics Vidhya Course Search Assistant")
    st.write("Find the perfect free course for your learning journey with AI-powered recommendations.")
    
    @st.cache_resource
    def initialize_search_system():
        return CourseSearchSystem()
    
    @st.cache_data
    def load_and_process_data():
        csv_path = r"data/detailed_courses.csv"
        try:
            df = pd.read_csv(csv_path)
            return df
        except FileNotFoundError:
            st.error(f"Could not find the file: {csv_path}")
            st.info("Please ensure the CSV file path is correct.")
            return None

    search_system = initialize_search_system()
    df = load_and_process_data()
    
    if df is not None:
        if 'index_built' not in st.session_state:
            with st.spinner("Building search index... This may take a moment."):
                search_system.create_vector_store(df)
                st.session_state.index_built = True

        with st.form(key='search_form'):
            query = st.text_input("πŸ” What would you like to learn?", 
                                placeholder="Example: machine learning for beginners")
            search_button = st.form_submit_button("Search Courses", use_container_width=True)

        if query and search_button:
            with st.spinner("Analyzing courses for you..."):
                response, courses = search_system.search_courses(query)
                
                if courses:
                    st.write("### πŸ“Š Course Analysis")
                    st.markdown(response)  # Display the parsed response
                    
                    st.write("### πŸ“š Recommended Courses")
                    for course in courses:
                        with st.expander(f"πŸ“˜ {course['title']}", expanded=True):
                            cols = st.columns([1, 1])
                            with cols[0]:
                                st.write(f"**Level:** {course['level']}")
                                st.write(f"**Duration:** {course['duration']}")
                            
                            with cols[1]:
                                st.markdown(f"[**Enroll Now** πŸš€]({course['url']})")
                            
                            st.write("**Overview:**")
                            st.write(course['brief'])
                            
                else:
                    st.warning("No courses found matching your query. Please try different search terms.")

if __name__ == "__main__":
    main()