Spaces:
Sleeping
Sleeping
Commit ·
b76b731
1
Parent(s): 44e5a43
updated requirements.txt
Browse files- app.py +4 -5
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -3,8 +3,6 @@ from bs4 import BeautifulSoup
|
|
| 3 |
from langchain_chroma import Chroma
|
| 4 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 5 |
from langchain_text_splitters import CharacterTextSplitter
|
| 6 |
-
import os
|
| 7 |
-
import sentence_transformers
|
| 8 |
import json
|
| 9 |
import streamlit as st
|
| 10 |
|
|
@@ -80,9 +78,10 @@ def get_course_details(url):
|
|
| 80 |
json.dump(course_texts, f, indent=4)
|
| 81 |
return course_texts
|
| 82 |
|
| 83 |
-
def get_documents(course_texts):
|
| 84 |
texts = []
|
| 85 |
metadatas = []
|
|
|
|
| 86 |
for course_text in course_texts:
|
| 87 |
texts.append(course_text["text"])
|
| 88 |
metadatas.append({
|
|
@@ -109,12 +108,12 @@ def read_json_data(file_path):
|
|
| 109 |
def main():
|
| 110 |
st.title("Analytics Vidhya Course Scraper")
|
| 111 |
url = get_domain_link() + "/collections/courses"
|
| 112 |
-
courses_texts = get_course_details(url)
|
| 113 |
query = st.text_input("What do you want to learn today", value="Large language models")
|
| 114 |
|
| 115 |
if st.button("Fetch Courses"):
|
| 116 |
st.info("Fetching courses please wait...")
|
| 117 |
-
courses_texts = read_json_data("
|
| 118 |
documents = get_documents(courses_texts)
|
| 119 |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
| 120 |
db = Chroma.from_documents(documents, embeddings)
|
|
|
|
| 3 |
from langchain_chroma import Chroma
|
| 4 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 5 |
from langchain_text_splitters import CharacterTextSplitter
|
|
|
|
|
|
|
| 6 |
import json
|
| 7 |
import streamlit as st
|
| 8 |
|
|
|
|
| 78 |
json.dump(course_texts, f, indent=4)
|
| 79 |
return course_texts
|
| 80 |
|
| 81 |
+
def get_documents(course_texts:list):
|
| 82 |
texts = []
|
| 83 |
metadatas = []
|
| 84 |
+
print("course_texts",course_texts)
|
| 85 |
for course_text in course_texts:
|
| 86 |
texts.append(course_text["text"])
|
| 87 |
metadatas.append({
|
|
|
|
| 108 |
def main():
|
| 109 |
st.title("Analytics Vidhya Course Scraper")
|
| 110 |
url = get_domain_link() + "/collections/courses"
|
| 111 |
+
# courses_texts = get_course_details(url)
|
| 112 |
query = st.text_input("What do you want to learn today", value="Large language models")
|
| 113 |
|
| 114 |
if st.button("Fetch Courses"):
|
| 115 |
st.info("Fetching courses please wait...")
|
| 116 |
+
courses_texts = read_json_data("content.json")
|
| 117 |
documents = get_documents(courses_texts)
|
| 118 |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
| 119 |
db = Chroma.from_documents(documents, embeddings)
|
requirements.txt
CHANGED
|
@@ -6,4 +6,6 @@ langchain-community
|
|
| 6 |
langchain-text-splitters
|
| 7 |
langchain-huggingface
|
| 8 |
python-dotenv
|
| 9 |
-
|
|
|
|
|
|
|
|
|
| 6 |
langchain-text-splitters
|
| 7 |
langchain-huggingface
|
| 8 |
python-dotenv
|
| 9 |
+
sentence-transformers
|
| 10 |
+
streamlit
|
| 11 |
+
torch
|