Commit
·
7206760
1
Parent(s):
56403af
loading content form public website started
Browse files
kg_builder/src/graph_creation.py
CHANGED
|
@@ -24,6 +24,9 @@ load_dotenv()
|
|
| 24 |
articles = {
|
| 25 |
"Traffic Law": "Traffic laws in the United States"
|
| 26 |
}
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
def build_graph_for_article(query, data_source_name):
|
| 29 |
"""
|
|
@@ -38,10 +41,15 @@ def build_graph_for_article(query, data_source_name):
|
|
| 38 |
chunk_size=400
|
| 39 |
chunk_overlap=10
|
| 40 |
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
if not raw_documents:
|
| 44 |
-
logger.error(f"Failed to load content for
|
| 45 |
return
|
| 46 |
|
| 47 |
logger.info(f"{str(len(raw_documents))} document(s) loaded from Wikipedia.")
|
|
|
|
| 24 |
articles = {
|
| 25 |
"Traffic Law": "Traffic laws in the United States"
|
| 26 |
}
|
| 27 |
+
#articles = {
|
| 28 |
+
# "SquirroDocs": "https://docs.squirro.com/en/latest/technical/getting-started.html"
|
| 29 |
+
#}
|
| 30 |
|
| 31 |
def build_graph_for_article(query, data_source_name):
|
| 32 |
"""
|
|
|
|
| 41 |
chunk_size=400
|
| 42 |
chunk_overlap=10
|
| 43 |
|
| 44 |
+
if data_source_name == "SquirroDocs":
|
| 45 |
+
logger.info(f"Loading document(s) from public website {query} ...")
|
| 46 |
+
raw_documents = None
|
| 47 |
+
else:
|
| 48 |
+
logger.info(f"Loading document(s) from Wikipedia using query '{query}' ...")
|
| 49 |
+
raw_documents = WikipediaLoader(query=query, load_max_docs=load_max_documents).load()
|
| 50 |
+
|
| 51 |
if not raw_documents:
|
| 52 |
+
logger.error(f"Failed to load content for Data Source '{data_source_name}'!")
|
| 53 |
return
|
| 54 |
|
| 55 |
logger.info(f"{str(len(raw_documents))} document(s) loaded from Wikipedia.")
|