File size: 5,564 Bytes
41524d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import json
import numpy as np
from sentence_transformers import SentenceTransformer, util
import gradio as gr
import requests
from bs4 import BeautifulSoup
import time
import torch
import traceback

# Extended sample data with business analyst courses
SAMPLE_COURSES = [
    {
        "title": "Business Analysis and Process Management Specialization",
        "description": "Learn core business analysis skills, process mapping, and improvement techniques for organizational efficiency.",
        "link": "https://www.coursera.org/specializations/business-analysis-process-management",
        "source": "coursera"
    },
    {
        "title": "Business Analytics Fundamentals",
        "description": "Master data-driven decision making, Excel, SQL, and visualization tools for business analysis.",
        "link": "https://www.udemy.com/course/business-analytics-fundamentals",
        "source": "udemy"
    },
    {
        "title": "Agile Business Analysis Professional",
        "description": "Learn agile methodologies, user stories, and modern BA practices for software projects.",
        "link": "https://www.coursera.org/professional-certificates/agile-business-analysis",
        "source": "coursera"
    },
    # Original sample courses...
    {
        "title": "Python Programming for Beginners",
        "description": "Learn Python from scratch. Covers basic concepts, data structures, and programming fundamentals.",
        "link": "https://www.udemy.com/course/python-for-beginners",
        "source": "udemy"
    },
    {
        "title": "Machine Learning Specialization",
        "description": "Comprehensive machine learning course covering supervised learning, neural networks, and practical ML projects.",
        "link": "https://www.coursera.org/specializations/machine-learning",
        "source": "coursera"
    }
]

def scrape_courses(query):
    """
    Scrape courses based on search query from multiple sources
    """
    courses = []
    
    # Udemy API endpoint (you would need to register for API access)
    udemy_url = f"https://www.udemy.com/api-2.0/courses/?search={query}&price=price-free"
    
    # Coursera API endpoint (you would need to register for API access)
    coursera_url = f"https://api.coursera.org/api/courses.v1?q=search&query={query}&includes=free"
    
    try:
        # Here you would implement the actual API calls
        # For now, we'll filter the sample courses based on the query
        query_terms = query.lower().split()
        for course in SAMPLE_COURSES:
            if any(term in course['title'].lower() or term in course['description'].lower() 
                  for term in query_terms):
                courses.append(course)
        
        return courses
    except Exception as e:
        print(f"Error scraping courses: {e}")
        return []

def search_courses(query):
    if not query.strip():
        return "Please enter a search query."

    try:
        # Get relevant courses based on the query
        relevant_courses = scrape_courses(query)
        
        if not relevant_courses:
            return "No courses found for your search query."
            
        # Initialize the model
        device = torch.device('cpu')
        model = SentenceTransformer("all-MiniLM-L6-v2")
        model = model.to(device)
        
        # Generate embeddings
        course_descriptions = [f"{course['title']} {course['description']}" for course in relevant_courses]
        course_embeddings = model.encode(course_descriptions, convert_to_tensor=True)
        
        # Generate query embedding
        query_embedding = model.encode(f"course about {query}", convert_to_tensor=True)
        
        # Calculate similarities
        similarities = util.pytorch_cos_sim(query_embedding, course_embeddings)[0]
        top_indices = np.argsort((-similarities).numpy())[:5]
        
        results = []
        for idx in top_indices:
            similarity_score = similarities[idx].item()
            course = relevant_courses[idx]
            results.append({
                "Title": course["title"],
                "Description": course["description"],
                "Link": course["link"],
                "Source": course["source"],
                "Relevance": f"{similarity_score:.2%}"
            })
        
        return results
    except Exception as e:
        print(f"Search error: {str(e)}")
        traceback.print_exc()
        return []

def search_interface(query):
    try:
        print(f"\nSearching for: {query}")
        results = search_courses(query)
        
        if isinstance(results, str):
            return results
        if not results:
            return "No matching courses found. Please try a different search term."
            
        display_text = "\n\n".join(
            [f"**Title**: {result['Title']}\n\n**Description**: {result['Description']}\n\n**Source:** {result['Source']}\n\n**Relevance:** {result['Relevance']}\n\n[Go to course]({result['Link']})" 
             for result in results]
        )
        return display_text
    except Exception as e:
        traceback.print_exc()
        return f"An error occurred: {str(e)}"

# Create and launch the Gradio interface
iface = gr.Interface(
    fn=search_interface,
    inputs="text",
    outputs="markdown",
    title="Free Course Search Engine",
    description="Enter a topic or keywords to find relevant free courses from Udemy and Coursera.",
    examples=["Python", "Business Analyst", "Data Science", "Web Development"]
)

iface.launch(share=True)