Spaces:

AmritSbisht
/

Analytics_Vidhya_Search_bot

Sleeping

App Files Files Community

AmritSbisht commited on Mar 23, 2025

Commit

3056267

verified ·

1 Parent(s): cd14657

Upload app.py

Browse files

Files changed (1) hide show

app.py +199 -0

app.py ADDED Viewed

	@@ -0,0 +1,199 @@

+import streamlit as st
+import pandas as pd
+import os
+from dotenv import load_dotenv
+from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
+from langchain_community.vectorstores import FAISS
+# Load environment variables
+load_dotenv()
+LANGSMITH_TRACING = os.getenv("LANGSMITH_TRACING")
+LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
+LANGSMITH_ENDPOINT = os.getenv("LANGSMITH_ENDPOINT")
+LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT")
+class CourseSearchSystem:
+    def __init__(self):
+        """
+        Initialize the course search system with Google's Generative AI
+        """
+        # Initialize the generative model for response generation
+        self.generation_model = ChatGoogleGenerativeAI(
+            model="gemini-1.5-pro",
+            convert_system_message_to_human=True, # Use the Gemini Pro model
+            google_api_key=os.getenv('GOOGLE_API_KEY'),
+            temperature=0.1,     # Lower temperature for more consistent outputs
+            top_p=0.8,           # Reasonable top_p value for focused sampling
+            top_k=40,            # Standard top_k value
+            max_output_tokens=2048  # Ensure sufficient length for detailed analysis
+        )
+        # Initialize the embedding model for RAG
+        self.embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+        self.vector_store = None
+        self.course_data = []
+    def process_course(self, row):
+        """
+        Process a single course row into a formatted string
+        """
+        return f"""
+        TITLE: {row['Title']}
+        BRIEF: {row['Brief']}
+        LEVEL: {row['Level']}
+        DURATION: {row['Duration']}
+        DESCRIPTION: {row['Description']}
+        URL: {row['Link']}
+        CURRICULUM: {row['Curriculum']}
+        TARGET AUDIENCE AND BENEFITS: {row['What should enroll & takeaway']}
+        """
+    def create_vector_store(self, df):
+        """
+        Create vector store from course data
+        """
+        try:
+            texts = []
+            for _, row in df.iterrows():
+                doc = self.process_course(row)
+                texts.append(doc)
+                self.course_data.append({
+                    'title': row['Title'],
+                    'brief': row['Brief'],
+                    'level': row['Level'],
+                    'duration': row['Duration'],
+                    'url': row['Link'],
+                    'curriculum': row['Curriculum'],
+                    'target_audience': row['What should enroll & takeaway']
+                })
+            # Create the vector store using the embedding model
+            self.vector_store = FAISS.from_texts(texts, self.embeddings)
+        except Exception as e:
+            st.error(f"Error creating vector store: {str(e)}")
+            raise
+    def search_courses(self, query, k=3):
+        """
+        Search for relevant courses based on query
+        """
+        try:
+            if not self.vector_store:
+                return "Error: Search index not initialized.", []
+            # Perform similarity search using the vector store
+            similar_docs = self.vector_store.similarity_search(query, k=k)
+            relevant_courses = []
+            relevant_chunks = []
+            for doc in similar_docs:
+                doc_content = doc.page_content
+                try:
+                    idx = next(i for i, course in enumerate(self.course_data)
+                             if course['title'] in doc_content)
+                    relevant_courses.append(self.course_data[idx])
+                    relevant_chunks.append(doc_content)
+                except StopIteration:
+                    continue
+            if not relevant_courses:
+                return "No matching courses found for your query.", []
+            # Generate analysis using the generative model
+            context = f"""
+            Act as an experienced course advisor analyzing courses for a student interested in: "{query}"
+            Based on their interest, analyze these relevant courses:
+            {relevant_chunks}
+            Provide a detailed analysis that includes:
+            1. Query Analysis: What specific learning needs or interests are indicated by this query
+            2. Course Recommendations: For each relevant course:
+               - Explain why it matches the student's needs
+               - Highlight key features and benefits
+               - Specify who would benefit most from this course
+            3. Best Match: Identify the most suitable course and explain
+            4. Learning Path: Suggest how the student might progress through these courses if relevant
+            Be specific in your analysis, mentioning course titles and concrete features.
+            Focus on how each course addresses the student's learning objectives.
+            """
+            # Use .invoke() to generate a response
+            response = self.generation_model.invoke(context)
+            # Extract the content from the response
+            if hasattr(response, 'content'):
+                parsed_response = response.content
+            else:
+                parsed_response = str(response)  # Fallback in case of unexpected structure
+            return parsed_response, relevant_courses
+        except Exception as e:
+            st.error(f"Error during course search: {str(e)}")
+            return f"Error during course search: {str(e)}", []
+def main():
+    """
+    Main function to run the Streamlit application
+    """
+    st.title("🎓 Analytics Vidhya Course Search Assistant")
+    st.write("Find the perfect free course for your learning journey with AI-powered recommendations.")
+    @st.cache_resource
+    def initialize_search_system():
+        return CourseSearchSystem()
+    @st.cache_data
+    def load_and_process_data():
+        csv_path = r"data/detailed_courses.csv"
+        try:
+            df = pd.read_csv(csv_path)
+            return df
+        except FileNotFoundError:
+            st.error(f"Could not find the file: {csv_path}")
+            st.info("Please ensure the CSV file path is correct.")
+            return None
+    search_system = initialize_search_system()
+    df = load_and_process_data()
+    if df is not None:
+        if 'index_built' not in st.session_state:
+            with st.spinner("Building search index... This may take a moment."):
+                search_system.create_vector_store(df)
+                st.session_state.index_built = True
+        with st.form(key='search_form'):
+            query = st.text_input("🔍 What would you like to learn?",
+                                placeholder="Example: machine learning for beginners")
+            search_button = st.form_submit_button("Search Courses", use_container_width=True)
+        if query and search_button:
+            with st.spinner("Analyzing courses for you..."):
+                response, courses = search_system.search_courses(query)
+                if courses:
+                    st.write("### 📊 Course Analysis")
+                    st.markdown(response)  # Display the parsed response
+                    st.write("### 📚 Recommended Courses")
+                    for course in courses:
+                        with st.expander(f"📘 {course['title']}", expanded=True):
+                            cols = st.columns([1, 1])
+                            with cols[0]:
+                                st.write(f"**Level:** {course['level']}")
+                                st.write(f"**Duration:** {course['duration']}")
+                            with cols[1]:
+                                st.markdown(f"[**Enroll Now** 🚀]({course['url']})")
+                            st.write("**Overview:**")
+                            st.write(course['brief'])
+                else:
+                    st.warning("No courses found matching your query. Please try different search terms.")
+if __name__ == "__main__":
+    main()