Spaces:
Running
Running
Commit ·
4cf5bcf
1
Parent(s): b8a64cb
provide djangoproject special rule
Browse files- loaders/rtdhtmlpage.py +3 -1
loaders/rtdhtmlpage.py
CHANGED
|
@@ -56,10 +56,12 @@ class RTDHtmlPageLoader(ReadTheDocsLoader):
|
|
| 56 |
for p in self.file_path.rglob("*"):
|
| 57 |
if p.is_dir():
|
| 58 |
continue
|
|
|
|
|
|
|
| 59 |
with open(p, encoding=self.encoding, errors=self.errors) as f:
|
| 60 |
text, title = self._my_clean_data(f.read())
|
| 61 |
|
| 62 |
-
if p.name == "index.html":
|
| 63 |
# Djangoドキュメントではindex.htmlにアクセスすると404になる
|
| 64 |
p = p.parent
|
| 65 |
url = f"https://{str(p)}/"
|
|
|
|
| 56 |
for p in self.file_path.rglob("*"):
|
| 57 |
if p.is_dir():
|
| 58 |
continue
|
| 59 |
+
# FIXME: utf-8を指定したい
|
| 60 |
+
# with open(p, encoding='utf-8', errors='ignore') as f:
|
| 61 |
with open(p, encoding=self.encoding, errors=self.errors) as f:
|
| 62 |
text, title = self._my_clean_data(f.read())
|
| 63 |
|
| 64 |
+
if "docs.djangoproject.com" in p.parts and p.name == "index.html":
|
| 65 |
# Djangoドキュメントではindex.htmlにアクセスすると404になる
|
| 66 |
p = p.parent
|
| 67 |
url = f"https://{str(p)}/"
|