Spaces:
Sleeping
Sleeping
File size: 5,564 Bytes
41524d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import json
import numpy as np
from sentence_transformers import SentenceTransformer, util
import gradio as gr
import requests
from bs4 import BeautifulSoup
import time
import torch
import traceback
# Extended sample data with business analyst courses
SAMPLE_COURSES = [
{
"title": "Business Analysis and Process Management Specialization",
"description": "Learn core business analysis skills, process mapping, and improvement techniques for organizational efficiency.",
"link": "https://www.coursera.org/specializations/business-analysis-process-management",
"source": "coursera"
},
{
"title": "Business Analytics Fundamentals",
"description": "Master data-driven decision making, Excel, SQL, and visualization tools for business analysis.",
"link": "https://www.udemy.com/course/business-analytics-fundamentals",
"source": "udemy"
},
{
"title": "Agile Business Analysis Professional",
"description": "Learn agile methodologies, user stories, and modern BA practices for software projects.",
"link": "https://www.coursera.org/professional-certificates/agile-business-analysis",
"source": "coursera"
},
# Original sample courses...
{
"title": "Python Programming for Beginners",
"description": "Learn Python from scratch. Covers basic concepts, data structures, and programming fundamentals.",
"link": "https://www.udemy.com/course/python-for-beginners",
"source": "udemy"
},
{
"title": "Machine Learning Specialization",
"description": "Comprehensive machine learning course covering supervised learning, neural networks, and practical ML projects.",
"link": "https://www.coursera.org/specializations/machine-learning",
"source": "coursera"
}
]
def scrape_courses(query):
"""
Scrape courses based on search query from multiple sources
"""
courses = []
# Udemy API endpoint (you would need to register for API access)
udemy_url = f"https://www.udemy.com/api-2.0/courses/?search={query}&price=price-free"
# Coursera API endpoint (you would need to register for API access)
coursera_url = f"https://api.coursera.org/api/courses.v1?q=search&query={query}&includes=free"
try:
# Here you would implement the actual API calls
# For now, we'll filter the sample courses based on the query
query_terms = query.lower().split()
for course in SAMPLE_COURSES:
if any(term in course['title'].lower() or term in course['description'].lower()
for term in query_terms):
courses.append(course)
return courses
except Exception as e:
print(f"Error scraping courses: {e}")
return []
def search_courses(query):
if not query.strip():
return "Please enter a search query."
try:
# Get relevant courses based on the query
relevant_courses = scrape_courses(query)
if not relevant_courses:
return "No courses found for your search query."
# Initialize the model
device = torch.device('cpu')
model = SentenceTransformer("all-MiniLM-L6-v2")
model = model.to(device)
# Generate embeddings
course_descriptions = [f"{course['title']} {course['description']}" for course in relevant_courses]
course_embeddings = model.encode(course_descriptions, convert_to_tensor=True)
# Generate query embedding
query_embedding = model.encode(f"course about {query}", convert_to_tensor=True)
# Calculate similarities
similarities = util.pytorch_cos_sim(query_embedding, course_embeddings)[0]
top_indices = np.argsort((-similarities).numpy())[:5]
results = []
for idx in top_indices:
similarity_score = similarities[idx].item()
course = relevant_courses[idx]
results.append({
"Title": course["title"],
"Description": course["description"],
"Link": course["link"],
"Source": course["source"],
"Relevance": f"{similarity_score:.2%}"
})
return results
except Exception as e:
print(f"Search error: {str(e)}")
traceback.print_exc()
return []
def search_interface(query):
try:
print(f"\nSearching for: {query}")
results = search_courses(query)
if isinstance(results, str):
return results
if not results:
return "No matching courses found. Please try a different search term."
display_text = "\n\n".join(
[f"**Title**: {result['Title']}\n\n**Description**: {result['Description']}\n\n**Source:** {result['Source']}\n\n**Relevance:** {result['Relevance']}\n\n[Go to course]({result['Link']})"
for result in results]
)
return display_text
except Exception as e:
traceback.print_exc()
return f"An error occurred: {str(e)}"
# Create and launch the Gradio interface
iface = gr.Interface(
fn=search_interface,
inputs="text",
outputs="markdown",
title="Free Course Search Engine",
description="Enter a topic or keywords to find relevant free courses from Udemy and Coursera.",
examples=["Python", "Business Analyst", "Data Science", "Web Development"]
)
iface.launch(share=True) |