Spaces:

navneetsatyamkumar
/

Analyticsvidhya-SmartSearch

Sleeping

App Files Files Community

navneetsatyamkumar commited on Jan 8, 2025

Commit

a9faa64

verified ·

1 Parent(s): f6475d0

Upload 6 files

Browse files

Files changed (6) hide show

app.py +80 -0
course_details.json +181 -0
course_faiss.index +0 -0
index_courses.py +39 -0
requirements.txt +90 -0
scrape_courses.py +85 -0

app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import streamlit as st
+import faiss
+import numpy as np
+from sentence_transformers import SentenceTransformer
+import json
+base_url = "https://courses.analyticsvidhya.com/"
+course_paths = [
+    "/courses/frameworks-for-effective-problem-solving",
+    "/courses/your-ultimate-guide-to-becoming-an-agentic-ai-expert-by-2025",
+    "/courses/a-comprehensive-learning-path-to-become-a-data-analyst-in-2025",
+    "/courses/reimagining-genai-common-mistakes-and-best-practices-for-success",
+    "/courses/coding-a-chatgpt-style-language-model-from-scratch-in-pytorch",
+    "/courses/mastering-multilingual-genai-open-weights-for-indic-languages",
+    "/courses/learning-autonomous-driving-behaviors-with-llms-and-rl",
+    "/courses/genai-applied-to-quantitative-finance-for-control-implementation",
+    "/courses/navigating-llm-tradeoffs-techniques-for-speed-cost-scale-and-accuracy",
+    "/courses/applied-machine-learning-beginner-to-professional",
+    "courses/ace-data-science-interviews",
+    "courses/data-science-hacks-tips-and-tricks",
+    "courses/getting-started-with-decision-trees",
+    "courses/loan-prediction-practice-problem-using-python",
+    "courses/big-mart-sales-prediction-using-r",
+    "courses/twitter-sentiment-analysis",
+    "courses/pandas-for-data-analysis-in-python",
+    "courses/support-vector-machine-svm-in-python-and-r",
+    "courses/nano-course-dreambooth-stable-diffusion-for-custom-images",
+    "courses/building-large-language-models-for-code",
+    "courses/cutting-edge-llm-tricks",
+]
+index = faiss.read_index("course_faiss.index")
+with open("course_details.json", "r") as f:
+    course_details = json.load(f)
+model = SentenceTransformer('all-MiniLM-L6-v2')
+def search_courses(query, top_k=5):
+    # Encode the query to get its embedding
+    query_embedding = model.encode([query])
+    query_embedding = np.array(query_embedding).astype("float32")
+    # Search the FAISS index for the top_k most similar courses
+    distances, indices = index.search(query_embedding, top_k)
+    results = []
+    for idx, dist in zip(indices[0], distances[0]):
+        course = course_details[idx]
+        results.append({
+            "title": course["title"],
+            "description": course["description"],
+            "curriculum": course["curriculum"],  # Include curriculum
+            "additional_info": course["additional_info"],  # Include additional info
+            "link": base_url + course_paths[idx],  # Use the base URL and course paths to generate the full link
+            "distance": dist
+        })
+    return results
+# Streamlit UI
+st.title("Smart Search for Free Courses")
+st.write("Search for free courses on Analytics Vidhya!")
+query = st.text_input("Enter your query:")
+if query:
+    results = search_courses(query)
+    for res in results:
+        st.subheader(res['title'])
+        st.write(res['description'])
+        if res['curriculum']:
+            st.write("### Curriculum")
+            for item in res['curriculum']:
+                st.write(f"- {item}")
+        if res['additional_info']:
+            st.write("### Additional Information")
+            st.write(f"**Duration:** {res['additional_info'].get('duration', 'N/A')}")
+            st.write(f"**Rating:** {res['additional_info'].get('rating', 'N/A')}")
+            st.write(f"**Difficulty:** {res['additional_info'].get('difficulty', 'N/A')}")
+        st.markdown(f"[Learn More]({res['link']})")

course_details.json ADDED Viewed

	@@ -0,0 +1,181 @@

+[
+    {
+        "title": "Frameworks for Effective Problem Solving",
+        "description": "Learn structured thinking with SMART, MECE, and Issue Trees. Define clear problem statements, solve challenges systematically, and avoid common pitfalls.",
+        "curriculum": [],
+        "additional_info": {
+            "duration": "1 Hour",
+            "rating": "4.9/5",
+            "difficulty": "Beginner"
+        }
+    },
+    {
+        "title": "Anyone can Build AI Agents - Free Course",
+        "description": "Dive into the world of AI with ease! This short and engaging course introduces the exciting possibilities of creating AI agents using no-code platforms. Whether you're new to AI or just curious, we've designed this course to be beginner-friendly.",
+        "curriculum": [],
+        "additional_info": {}
+    },
+    {
+        "title": "A Comprehensive Learning Path to Become a Data Analyst in 2025",
+        "description": "Want to become a data analyst this year, but confused about where to start and what to follow? This comprehensive learning path from Analytics Vidhya should provide you with all the answers you need!",
+        "curriculum": [],
+        "additional_info": {
+            "duration": "2 Hours",
+            "rating": "4.8/5",
+            "difficulty": "Beginner"
+        }
+    },
+    {
+        "title": "Reimagining GenAI: Common Mistakes and Best Practices for Success",
+        "description": "Discover the Secrets to Implementing Generative AI Successfully",
+        "curriculum": [],
+        "additional_info": {
+            "duration": "1 Hour",
+            "rating": "4.8/5",
+            "difficulty": "Beginner"
+        }
+    },
+    {
+        "title": "Coding a ChatGPT-style Language Model from Scratch in PyTorch",
+        "description": "Learn to build your own language model with PyTorch step-by-step.",
+        "curriculum": [],
+        "additional_info": {
+            "duration": "1 Hour",
+            "rating": "4.8/5",
+            "difficulty": "Beginner"
+        }
+    },
+    {
+        "title": "Mastering Multilingual GenAI Open-Weights for Indic Languages",
+        "description": "Unlock the power of open-weight models to build cutting-edge multilingual AI solutions.",
+        "curriculum": [],
+        "additional_info": {
+            "duration": "1 Hour",
+            "rating": "4.7/5",
+            "difficulty": "Beginner"
+        }
+    },
+    {
+        "title": "Learning Autonomous Driving Behaviors with LLMs & RL",
+        "description": "Learn to train autonomous driving agents using Reinforcement Learning (RL) and Large Language Models (LLMs). Gain practical experience designing AI systems that simulate safe, human-like driving behavior.",
+        "curriculum": [],
+        "additional_info": {
+            "duration": "1 Hour",
+            "rating": "4.7/5",
+            "difficulty": "Intermediate"
+        }
+    },
+    {
+        "title": "GenAI Applied to Quantitative Finance: For Control Implementation",
+        "description": "Embark on the journey to understand quantitative finance with GenAI. Learn to implement AI-driven control systems for trading, risk management, and predictive modeling, optimizing financial decision-making and performance.",
+        "curriculum": [],
+        "additional_info": {
+            "duration": "1 Hour",
+            "rating": "4.7/5",
+            "difficulty": "Intermediate"
+        }
+    },
+    {
+        "title": "Navigating LLM Tradeoffs: Techniques for Speed, Cost, Scale & Accuracy",
+        "description": "Master the art of optimizing LLMs with practical techniques to achieve the best balance of performance and cost.",
+        "curriculum": [],
+        "additional_info": {
+            "duration": "1 Hour",
+            "rating": "4.8/5",
+            "difficulty": "Beginner"
+        }
+    },
+    {
+        "title": "Applied Machine Learning - Beginner to Professional",
+        "description": "This course provides you all the tools and techniques you need to apply machine learning to solve business problems. We will cover the basics of machine learning, how to build machine learning models, improve and deploy your machine learning models.",
+        "curriculum": [],
+        "additional_info": {}
+    },
+    {
+        "title": "Ace Data Science Interviews",
+        "description": "A comprehensive course covering different kinds of interviews in data science industry and how to ace these interviews. This includes technical interviews on data science / machine learning, case study interviews, guesstimate based interviews.",
+        "curriculum": [],
+        "additional_info": {}
+    },
+    {
+        "title": "Data Science Hacks, Tips and Tricks",
+        "description": "Become a better data scientist with crucial data science tips, tricks, python hacks, and efficient python code. Get python efficiency tips from industry experts at your finger tips.",
+        "curriculum": [],
+        "additional_info": {}
+    },
+    {
+        "title": "Getting started with Decision Trees",
+        "description": "Unleash the power of decision tree algorithm in machine learning with our free decision tree course and training designed for beginners to learn coding in python.",
+        "curriculum": [],
+        "additional_info": {}
+    },
+    {
+        "title": "Loan Prediction Practice Problem (Using Python)",
+        "description": "This course is aimed for people getting started into Data Science and Machine Learning while working on a real life practical problem.",
+        "curriculum": [],
+        "additional_info": {
+            "rating": "4.7/5",
+            "difficulty": "Intermediate"
+        }
+    },
+    {
+        "title": "Big Mart Sales Prediction Using R",
+        "description": "This course is aimed for people getting started into Data Science and Machine Learning while solving the Big Mart Sales Prediction problem.",
+        "curriculum": [],
+        "additional_info": {
+            "rating": "4.6/5",
+            "difficulty": "Intermediate"
+        }
+    },
+    {
+        "title": "Twitter Sentiment Analysis",
+        "description": "What is sentiment analysis? Why is sentiment analysis so popular in data science? And how can you perform sentiment analysis? Find the answers to all these questions in this free course on Sentiment Analysis using Python!",
+        "curriculum": [],
+        "additional_info": {
+            "rating": "4.7/5",
+            "difficulty": "Intermediate"
+        }
+    },
+    {
+        "title": "Pandas for Data Analysis in Python",
+        "description": "Learn high-performance pandas in python tutorial, pandas library in python for data analysis in data science. Explore python libraries for data science in this exemplary free course.",
+        "curriculum": [],
+        "additional_info": {}
+    },
+    {
+        "title": "Support Vector Machine (SVM) in Python and R",
+        "description": "Upskill with Support Vector Machine (SVM) in python, learn about SVM implementation in python from scratch in this free course for data scientists to ace in their data science career.",
+        "curriculum": [],
+        "additional_info": {}
+    },
+    {
+        "title": "Nano Course: Dreambooth-Stable Diffusion for Custom Images",
+        "description": "Theory to Practice: Dive into Stable Diffusion, its history, and significance, then master the Dreambooth process. Learn how to fine-tune Dreambooth model with your custom images discussing step by step in detail.",
+        "curriculum": [],
+        "additional_info": {
+            "duration": "1 Hour",
+            "rating": "4.6/5",
+            "difficulty": "Advanced"
+        }
+    },
+    {
+        "title": "Nano Course: Building Large Language Models for Code",
+        "description": "Learn how to train Large Language Models for Code from Scratch covering each step involved in detail from training data curation to model evaluation. Deep dive into the journey of creating Starcoder, a 15B parameter code generation model.",
+        "curriculum": [],
+        "additional_info": {
+            "duration": "38 Mins",
+            "rating": "4.7",
+            "difficulty": "Intermediate"
+        }
+    },
+    {
+        "title": "Nano Course: Cutting Edge LLM Tricks",
+        "description": "Learn cutting edge LLM tricks and techniques from top research papers including DeepMind and Meta AI and apply these tricks in building your own state of the art LLMs.",
+        "curriculum": [],
+        "additional_info": {
+            "duration": "38 Mins",
+            "rating": "4.7/5",
+            "difficulty": "Advanced"
+        }
+    }
+]

course_faiss.index ADDED Viewed

Binary file (32.3 kB). View file

index_courses.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import json
+import faiss
+import numpy as np
+from sentence_transformers import SentenceTransformer
+from scrape_courses import all_course_details  # Import the scraped course data
+# Initialize SentenceTransformer model for embeddings
+model = SentenceTransformer('all-MiniLM-L6-v2')
+# Function to store course details in FAISS
+def store_in_faiss(course_details):
+    titles = [course["title"] for course in course_details]
+    descriptions = [course["description"] for course in course_details]
+    # Combine titles and descriptions into one string for a more comprehensive embedding
+    combined_texts = [title + " " + description for title, description in zip(titles, descriptions)]
+    # Generate embeddings for course details
+    embeddings = model.encode(combined_texts)
+    # Convert embeddings to numpy array for FAISS
+    embeddings = np.array(embeddings).astype("float32")
+    # Initialize FAISS index
+    dimension = embeddings.shape[1]  # Get the dimensionality of the embeddings
+    index = faiss.IndexFlatL2(dimension)  # Use L2 distance for similarity
+    # Add embeddings to FAISS index
+    index.add(embeddings)
+    return index
+# Store course details in FAISS
+faiss_index = store_in_faiss(all_course_details)
+# Optionally, save the FAISS index to disk
+faiss.write_index(faiss_index, "course_faiss.index")
+print("Indexing completed. FAISS index saved to 'course_faiss.index'.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,90 @@

+aiohappyeyeballs==2.4.4
+aiohttp==3.11.11
+aiosignal==1.3.2
+altair==5.5.0
+annotated-types==0.7.0
+anyio==4.7.0
+attrs==24.3.0
+beautifulsoup4==4.12.3
+blinker==1.9.0
+bs4==0.0.2
+cachetools==5.5.0
+certifi==2024.12.14
+charset-normalizer==3.4.1
+click==8.1.8
+faiss-cpu==1.9.0.post1
+filelock==3.16.1
+frozenlist==1.5.0
+fsspec==2024.12.0
+gitdb==4.0.12
+GitPython==3.1.44
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.27.0
+idna==3.10
+Jinja2==3.1.5
+joblib==1.4.2
+jsonpatch==1.33
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2024.10.1
+langchain==0.3.13
+langchain-core==0.3.28
+langchain-text-splitters==0.3.4
+langsmith==0.2.7
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.1.0
+narwhals==1.20.1
+networkx==3.4.2
+numpy==2.2.1
+orjson==3.10.13
+packaging==24.2
+pandas==2.2.3
+pillow==11.1.0
+pinecone-client==5.0.1
+pinecone-plugin-inference==1.1.0
+pinecone-plugin-interface==0.0.7
+propcache==0.2.1
+protobuf==5.29.2
+pyarrow==18.1.0
+pydantic==2.10.4
+pydantic_core==2.27.2
+pydeck==0.9.1
+Pygments==2.18.0
+python-dateutil==2.9.0.post0
+pytz==2024.2
+PyYAML==6.0.2
+referencing==0.35.1
+regex==2024.11.6
+requests==2.32.3
+requests-toolbelt==1.0.0
+rich==13.9.4
+rpds-py==0.22.3
+safetensors==0.5.0
+scikit-learn==1.6.0
+scipy==1.14.1
+sentence-transformers==3.3.1
+setuptools==75.6.0
+six==1.17.0
+smmap==5.0.2
+sniffio==1.3.1
+soupsieve==2.6
+SQLAlchemy==2.0.36
+streamlit==1.41.1
+sympy==1.13.1
+tenacity==9.0.0
+threadpoolctl==3.5.0
+tokenizers==0.21.0
+toml==0.10.2
+torch==2.5.1
+tornado==6.4.2
+tqdm==4.67.1
+transformers==4.47.1
+typing_extensions==4.12.2
+tzdata==2024.2
+urllib3==2.3.0
+yarl==1.18.3

scrape_courses.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import requests
+from bs4 import BeautifulSoup
+base_url = "https://courses.analyticsvidhya.com/"
+course_paths = [
+    "/courses/frameworks-for-effective-problem-solving",
+    "/courses/your-ultimate-guide-to-becoming-an-agentic-ai-expert-by-2025",
+    "/courses/a-comprehensive-learning-path-to-become-a-data-analyst-in-2025",
+    "/courses/reimagining-genai-common-mistakes-and-best-practices-for-success",
+    "/courses/coding-a-chatgpt-style-language-model-from-scratch-in-pytorch",
+    "/courses/mastering-multilingual-genai-open-weights-for-indic-languages",
+    "/courses/learning-autonomous-driving-behaviors-with-llms-and-rl",
+    "/courses/genai-applied-to-quantitative-finance-for-control-implementation",
+    "/courses/navigating-llm-tradeoffs-techniques-for-speed-cost-scale-and-accuracy",
+    "/courses/applied-machine-learning-beginner-to-professional",
+    "courses/ace-data-science-interviews",
+    "courses/data-science-hacks-tips-and-tricks",
+    "courses/getting-started-with-decision-trees",
+    "courses/loan-prediction-practice-problem-using-python",
+    "courses/big-mart-sales-prediction-using-r",
+    "courses/twitter-sentiment-analysis",
+    "courses/pandas-for-data-analysis-in-python",
+    "courses/support-vector-machine-svm-in-python-and-r",
+    "courses/nano-course-dreambooth-stable-diffusion-for-custom-images",
+    "courses/building-large-language-models-for-code",
+    "courses/cutting-edge-llm-tricks",
+]
+def scrape_course_details(course_path):
+    url = base_url + course_path
+    response = requests.get(url)
+    if response.status_code != 200:
+        print(f"Failed to fetch {url}")
+        return None
+    soup = BeautifulSoup(response.text, 'html.parser')
+    # Extract title
+    title = soup.find("h1").text.strip() if soup.find("h1") else "No title found"
+    # Extract description
+    description = soup.find("meta", {"name": "description"})["content"].strip() if soup.find("meta", {"name": "description"}) else "No description found"
+    # Extract curriculum
+    curriculum_header = soup.find("h3", class_="section__heading", string="Course curriculum")
+    curriculum = []
+    if curriculum_header:
+        # Get the list of curriculum items
+        curriculum_list = curriculum_header.find_next("ul", class_="text-icon__list section__body")
+        if curriculum_list:
+            curriculum = [item.get_text(strip=True) for item in curriculum_list.find_all("h4")]
+    # Extract additional course information (duration, rating, difficulty)
+    additional_info = {}
+    info_list = soup.select(".text-icon__list-item")
+    for item in info_list:
+        icon = item.find("i")
+        if icon:
+            if "fa-clock-o" in icon.get("class", []):
+                additional_info["duration"] = item.find("h4").text.strip() if item.find("h4") else "No duration"
+            elif "fa-star" in icon.get("class", []):
+                additional_info["rating"] = item.find("h4").text.strip() if item.find("h4") else "No rating"
+            elif "fa-signal" in icon.get("class", []):
+                additional_info["difficulty"] = item.find("h4").text.strip() if item.find("h4") else "No difficulty level"
+    return {
+        "title": title,
+        "description": description,
+        "curriculum": curriculum,
+        "additional_info": additional_info
+    }
+all_course_details = []
+for path in course_paths:
+    details = scrape_course_details(path)
+    if details:
+        all_course_details.append(details)
+import json
+with open("course_details.json", "w") as f:
+    json.dump(all_course_details, f, indent=4)
+print("Scraping completed. Details saved to 'course_details.json'.")