Spaces:
Sleeping
Sleeping
Commit ·
39947ea
1
Parent(s): ed79f19
multiple outputs
Browse files
app.py
CHANGED
|
@@ -95,6 +95,17 @@ def get_documents(courses_texts):
|
|
| 95 |
documents = text_splitter.create_documents(texts = texts, metadatas = metadatas)
|
| 96 |
return documents
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
def main():
|
| 100 |
st.title("Analytics Vidhya Course Scraper")
|
|
@@ -105,17 +116,18 @@ def main():
|
|
| 105 |
url = get_domain_link() + "/collections/courses"
|
| 106 |
courses_texts = get_course_details(url)
|
| 107 |
|
|
|
|
| 108 |
documents = get_documents(courses_texts)
|
| 109 |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
| 110 |
db = Chroma.from_documents(documents, embeddings)
|
| 111 |
docs = db.similarity_search(query)
|
| 112 |
|
| 113 |
if docs:
|
| 114 |
-
st.success(f"Found {len(
|
| 115 |
st.write("Course Links:")
|
| 116 |
-
for course in docs:
|
| 117 |
-
st.write(f"
|
| 118 |
-
st.write(f"-
|
| 119 |
else:
|
| 120 |
st.warning("No courses found.")
|
| 121 |
|
|
|
|
| 95 |
documents = text_splitter.create_documents(texts = texts, metadatas = metadatas)
|
| 96 |
return documents
|
| 97 |
|
| 98 |
+
def read_json_data(file_path):
|
| 99 |
+
try:
|
| 100 |
+
with open(file_path, 'r') as file:
|
| 101 |
+
data = json.load(file)
|
| 102 |
+
return data
|
| 103 |
+
except FileNotFoundError:
|
| 104 |
+
print(f"Error: File not found at {file_path}")
|
| 105 |
+
return None
|
| 106 |
+
except json.JSONDecodeError:
|
| 107 |
+
print(f"Error: Invalid JSON format in {file_path}")
|
| 108 |
+
return None
|
| 109 |
|
| 110 |
def main():
|
| 111 |
st.title("Analytics Vidhya Course Scraper")
|
|
|
|
| 116 |
url = get_domain_link() + "/collections/courses"
|
| 117 |
courses_texts = get_course_details(url)
|
| 118 |
|
| 119 |
+
courses_texts = read_json_data("/content/course_data.json")
|
| 120 |
documents = get_documents(courses_texts)
|
| 121 |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
| 122 |
db = Chroma.from_documents(documents, embeddings)
|
| 123 |
docs = db.similarity_search(query)
|
| 124 |
|
| 125 |
if docs:
|
| 126 |
+
st.success(f"Found {len(docs)} courses!")
|
| 127 |
st.write("Course Links:")
|
| 128 |
+
for i, course in enumerate(docs):
|
| 129 |
+
st.write(f"{i+1}. {course.metadata["course_name"]}")
|
| 130 |
+
st.write(f" -{course.metadata["link"]}")
|
| 131 |
else:
|
| 132 |
st.warning("No courses found.")
|
| 133 |
|