AmritSbisht commited on
Commit
3056267
Β·
verified Β·
1 Parent(s): cd14657

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +199 -0
app.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
6
+ from langchain_community.vectorstores import FAISS
7
+
8
+ # Load environment variables
9
+ load_dotenv()
10
+
11
+ LANGSMITH_TRACING = os.getenv("LANGSMITH_TRACING")
12
+ LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
13
+ LANGSMITH_ENDPOINT = os.getenv("LANGSMITH_ENDPOINT")
14
+ LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT")
15
+ class CourseSearchSystem:
16
+ def __init__(self):
17
+ """
18
+ Initialize the course search system with Google's Generative AI
19
+ """
20
+ # Initialize the generative model for response generation
21
+ self.generation_model = ChatGoogleGenerativeAI(
22
+ model="gemini-1.5-pro",
23
+ convert_system_message_to_human=True, # Use the Gemini Pro model
24
+ google_api_key=os.getenv('GOOGLE_API_KEY'),
25
+ temperature=0.1, # Lower temperature for more consistent outputs
26
+ top_p=0.8, # Reasonable top_p value for focused sampling
27
+ top_k=40, # Standard top_k value
28
+ max_output_tokens=2048 # Ensure sufficient length for detailed analysis
29
+ )
30
+
31
+ # Initialize the embedding model for RAG
32
+ self.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
33
+ self.vector_store = None
34
+ self.course_data = []
35
+
36
+ def process_course(self, row):
37
+ """
38
+ Process a single course row into a formatted string
39
+ """
40
+ return f"""
41
+ TITLE: {row['Title']}
42
+ BRIEF: {row['Brief']}
43
+ LEVEL: {row['Level']}
44
+ DURATION: {row['Duration']}
45
+ DESCRIPTION: {row['Description']}
46
+ URL: {row['Link']}
47
+ CURRICULUM: {row['Curriculum']}
48
+ TARGET AUDIENCE AND BENEFITS: {row['What should enroll & takeaway']}
49
+ """
50
+
51
+ def create_vector_store(self, df):
52
+ """
53
+ Create vector store from course data
54
+ """
55
+ try:
56
+ texts = []
57
+ for _, row in df.iterrows():
58
+ doc = self.process_course(row)
59
+ texts.append(doc)
60
+ self.course_data.append({
61
+ 'title': row['Title'],
62
+ 'brief': row['Brief'],
63
+ 'level': row['Level'],
64
+ 'duration': row['Duration'],
65
+ 'url': row['Link'],
66
+ 'curriculum': row['Curriculum'],
67
+ 'target_audience': row['What should enroll & takeaway']
68
+ })
69
+
70
+ # Create the vector store using the embedding model
71
+ self.vector_store = FAISS.from_texts(texts, self.embeddings)
72
+ except Exception as e:
73
+ st.error(f"Error creating vector store: {str(e)}")
74
+ raise
75
+
76
+ def search_courses(self, query, k=3):
77
+ """
78
+ Search for relevant courses based on query
79
+ """
80
+ try:
81
+ if not self.vector_store:
82
+ return "Error: Search index not initialized.", []
83
+
84
+ # Perform similarity search using the vector store
85
+ similar_docs = self.vector_store.similarity_search(query, k=k)
86
+
87
+ relevant_courses = []
88
+ relevant_chunks = []
89
+
90
+ for doc in similar_docs:
91
+ doc_content = doc.page_content
92
+ try:
93
+ idx = next(i for i, course in enumerate(self.course_data)
94
+ if course['title'] in doc_content)
95
+ relevant_courses.append(self.course_data[idx])
96
+ relevant_chunks.append(doc_content)
97
+ except StopIteration:
98
+ continue
99
+
100
+ if not relevant_courses:
101
+ return "No matching courses found for your query.", []
102
+
103
+ # Generate analysis using the generative model
104
+ context = f"""
105
+ Act as an experienced course advisor analyzing courses for a student interested in: "{query}"
106
+
107
+ Based on their interest, analyze these relevant courses:
108
+ {relevant_chunks}
109
+
110
+ Provide a detailed analysis that includes:
111
+ 1. Query Analysis: What specific learning needs or interests are indicated by this query
112
+ 2. Course Recommendations: For each relevant course:
113
+ - Explain why it matches the student's needs
114
+ - Highlight key features and benefits
115
+ - Specify who would benefit most from this course
116
+ 3. Best Match: Identify the most suitable course and explain
117
+ 4. Learning Path: Suggest how the student might progress through these courses if relevant
118
+
119
+ Be specific in your analysis, mentioning course titles and concrete features.
120
+ Focus on how each course addresses the student's learning objectives.
121
+ """
122
+
123
+ # Use .invoke() to generate a response
124
+ response = self.generation_model.invoke(context)
125
+
126
+ # Extract the content from the response
127
+ if hasattr(response, 'content'):
128
+ parsed_response = response.content
129
+ else:
130
+ parsed_response = str(response) # Fallback in case of unexpected structure
131
+
132
+ return parsed_response, relevant_courses
133
+ except Exception as e:
134
+ st.error(f"Error during course search: {str(e)}")
135
+ return f"Error during course search: {str(e)}", []
136
+
137
+ def main():
138
+ """
139
+ Main function to run the Streamlit application
140
+ """
141
+ st.title("πŸŽ“ Analytics Vidhya Course Search Assistant")
142
+ st.write("Find the perfect free course for your learning journey with AI-powered recommendations.")
143
+
144
+ @st.cache_resource
145
+ def initialize_search_system():
146
+ return CourseSearchSystem()
147
+
148
+ @st.cache_data
149
+ def load_and_process_data():
150
+ csv_path = r"data/detailed_courses.csv"
151
+ try:
152
+ df = pd.read_csv(csv_path)
153
+ return df
154
+ except FileNotFoundError:
155
+ st.error(f"Could not find the file: {csv_path}")
156
+ st.info("Please ensure the CSV file path is correct.")
157
+ return None
158
+
159
+ search_system = initialize_search_system()
160
+ df = load_and_process_data()
161
+
162
+ if df is not None:
163
+ if 'index_built' not in st.session_state:
164
+ with st.spinner("Building search index... This may take a moment."):
165
+ search_system.create_vector_store(df)
166
+ st.session_state.index_built = True
167
+
168
+ with st.form(key='search_form'):
169
+ query = st.text_input("πŸ” What would you like to learn?",
170
+ placeholder="Example: machine learning for beginners")
171
+ search_button = st.form_submit_button("Search Courses", use_container_width=True)
172
+
173
+ if query and search_button:
174
+ with st.spinner("Analyzing courses for you..."):
175
+ response, courses = search_system.search_courses(query)
176
+
177
+ if courses:
178
+ st.write("### πŸ“Š Course Analysis")
179
+ st.markdown(response) # Display the parsed response
180
+
181
+ st.write("### πŸ“š Recommended Courses")
182
+ for course in courses:
183
+ with st.expander(f"πŸ“˜ {course['title']}", expanded=True):
184
+ cols = st.columns([1, 1])
185
+ with cols[0]:
186
+ st.write(f"**Level:** {course['level']}")
187
+ st.write(f"**Duration:** {course['duration']}")
188
+
189
+ with cols[1]:
190
+ st.markdown(f"[**Enroll Now** πŸš€]({course['url']})")
191
+
192
+ st.write("**Overview:**")
193
+ st.write(course['brief'])
194
+
195
+ else:
196
+ st.warning("No courses found matching your query. Please try different search terms.")
197
+
198
+ if __name__ == "__main__":
199
+ main()