Spaces:

AmritSbisht
/

Analytics_Vidhya_Search_bot

Sleeping

App Files Files Community

Analytics_Vidhya_Search_bot / app.py

AmritSbisht

Upload app.py

3056267 verified 10 months ago

raw

history blame contribute delete

8.36 kB

	import streamlit as st
	import pandas as pd
	import os
	from dotenv import load_dotenv
	from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
	from langchain_community.vectorstores import FAISS

	# Load environment variables
	load_dotenv()

	LANGSMITH_TRACING = os.getenv("LANGSMITH_TRACING")
	LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
	LANGSMITH_ENDPOINT = os.getenv("LANGSMITH_ENDPOINT")
	LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT")
	class CourseSearchSystem:
	def __init__(self):
	"""
	Initialize the course search system with Google's Generative AI
	"""
	# Initialize the generative model for response generation
	self.generation_model = ChatGoogleGenerativeAI(
	model="gemini-1.5-pro",
	convert_system_message_to_human=True, # Use the Gemini Pro model
	google_api_key=os.getenv('GOOGLE_API_KEY'),
	temperature=0.1, # Lower temperature for more consistent outputs
	top_p=0.8, # Reasonable top_p value for focused sampling
	top_k=40, # Standard top_k value
	max_output_tokens=2048 # Ensure sufficient length for detailed analysis
	)

	# Initialize the embedding model for RAG
	self.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
	self.vector_store = None
	self.course_data = []

	def process_course(self, row):
	"""
	Process a single course row into a formatted string
	"""
	return f"""
	TITLE: {row['Title']}
	BRIEF: {row['Brief']}
	LEVEL: {row['Level']}
	DURATION: {row['Duration']}
	DESCRIPTION: {row['Description']}
	URL: {row['Link']}
	CURRICULUM: {row['Curriculum']}
	TARGET AUDIENCE AND BENEFITS: {row['What should enroll & takeaway']}
	"""

	def create_vector_store(self, df):
	"""
	Create vector store from course data
	"""
	try:
	texts = []
	for _, row in df.iterrows():
	doc = self.process_course(row)
	texts.append(doc)
	self.course_data.append({
	'title': row['Title'],
	'brief': row['Brief'],
	'level': row['Level'],
	'duration': row['Duration'],
	'url': row['Link'],
	'curriculum': row['Curriculum'],
	'target_audience': row['What should enroll & takeaway']
	})

	# Create the vector store using the embedding model
	self.vector_store = FAISS.from_texts(texts, self.embeddings)
	except Exception as e:
	st.error(f"Error creating vector store: {str(e)}")
	raise

	def search_courses(self, query, k=3):
	"""
	Search for relevant courses based on query
	"""
	try:
	if not self.vector_store:
	return "Error: Search index not initialized.", []

	# Perform similarity search using the vector store
	similar_docs = self.vector_store.similarity_search(query, k=k)

	relevant_courses = []
	relevant_chunks = []

	for doc in similar_docs:
	doc_content = doc.page_content
	try:
	idx = next(i for i, course in enumerate(self.course_data)
	if course['title'] in doc_content)
	relevant_courses.append(self.course_data[idx])
	relevant_chunks.append(doc_content)
	except StopIteration:
	continue

	if not relevant_courses:
	return "No matching courses found for your query.", []

	# Generate analysis using the generative model
	context = f"""
	Act as an experienced course advisor analyzing courses for a student interested in: "{query}"

	Based on their interest, analyze these relevant courses:
	{relevant_chunks}

	Provide a detailed analysis that includes:
	1. Query Analysis: What specific learning needs or interests are indicated by this query
	2. Course Recommendations: For each relevant course:
	- Explain why it matches the student's needs
	- Highlight key features and benefits
	- Specify who would benefit most from this course
	3. Best Match: Identify the most suitable course and explain
	4. Learning Path: Suggest how the student might progress through these courses if relevant

	Be specific in your analysis, mentioning course titles and concrete features.
	Focus on how each course addresses the student's learning objectives.
	"""

	# Use .invoke() to generate a response
	response = self.generation_model.invoke(context)

	# Extract the content from the response
	if hasattr(response, 'content'):
	parsed_response = response.content
	else:
	parsed_response = str(response) # Fallback in case of unexpected structure

	return parsed_response, relevant_courses
	except Exception as e:
	st.error(f"Error during course search: {str(e)}")
	return f"Error during course search: {str(e)}", []

	def main():
	"""
	Main function to run the Streamlit application
	"""
	st.title("🎓 Analytics Vidhya Course Search Assistant")
	st.write("Find the perfect free course for your learning journey with AI-powered recommendations.")

	@st.cache_resource
	def initialize_search_system():
	return CourseSearchSystem()

	@st.cache_data
	def load_and_process_data():
	csv_path = r"data/detailed_courses.csv"
	try:
	df = pd.read_csv(csv_path)
	return df
	except FileNotFoundError:
	st.error(f"Could not find the file: {csv_path}")
	st.info("Please ensure the CSV file path is correct.")
	return None

	search_system = initialize_search_system()
	df = load_and_process_data()

	if df is not None:
	if 'index_built' not in st.session_state:
	with st.spinner("Building search index... This may take a moment."):
	search_system.create_vector_store(df)
	st.session_state.index_built = True

	with st.form(key='search_form'):
	query = st.text_input("🔍 What would you like to learn?",
	placeholder="Example: machine learning for beginners")
	search_button = st.form_submit_button("Search Courses", use_container_width=True)

	if query and search_button:
	with st.spinner("Analyzing courses for you..."):
	response, courses = search_system.search_courses(query)

	if courses:
	st.write("### 📊 Course Analysis")
	st.markdown(response) # Display the parsed response

	st.write("### 📚 Recommended Courses")
	for course in courses:
	with st.expander(f"📘 {course['title']}", expanded=True):
	cols = st.columns([1, 1])
	with cols[0]:
	st.write(f"Level: {course['level']}")
	st.write(f"Duration: {course['duration']}")

	with cols[1]:
	st.markdown(f"[Enroll Now 🚀]({course['url']})")

	st.write("Overview:")
	st.write(course['brief'])

	else:
	st.warning("No courses found matching your query. Please try different search terms.")

	if __name__ == "__main__":
	main()