Update RepoPipeline.py
Browse files- RepoPipeline.py +4 -4
RepoPipeline.py
CHANGED
|
@@ -127,12 +127,12 @@ def extract_information(repos, headers=None):
|
|
| 127 |
except SyntaxError as e:
|
| 128 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
| 129 |
# 3. Extracting readme.
|
| 130 |
-
elif ((member.name
|
| 131 |
try:
|
| 132 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
| 133 |
# extract readme
|
| 134 |
-
|
| 135 |
-
repo_info["readmes"].update(
|
| 136 |
except UnicodeDecodeError as e:
|
| 137 |
tqdm.write(
|
| 138 |
f"[-] UnicodeDecodeError in {member.name}, skipping: \n{e}"
|
|
@@ -140,7 +140,7 @@ def extract_information(repos, headers=None):
|
|
| 140 |
except SyntaxError as e:
|
| 141 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
| 142 |
# 4. Extracting requirements.
|
| 143 |
-
elif (member.name
|
| 144 |
try:
|
| 145 |
lines = tar.extractfile(member).readlines().decode("utf-8")
|
| 146 |
# extract readme
|
|
|
|
| 127 |
except SyntaxError as e:
|
| 128 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
| 129 |
# 3. Extracting readme.
|
| 130 |
+
elif ((member.name is "README.md" or member.name is "README.rst") and member.isfile()) is True:
|
| 131 |
try:
|
| 132 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
| 133 |
# extract readme
|
| 134 |
+
readmes_set = set(file_content)
|
| 135 |
+
repo_info["readmes"].update(readmes_set)
|
| 136 |
except UnicodeDecodeError as e:
|
| 137 |
tqdm.write(
|
| 138 |
f"[-] UnicodeDecodeError in {member.name}, skipping: \n{e}"
|
|
|
|
| 140 |
except SyntaxError as e:
|
| 141 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
| 142 |
# 4. Extracting requirements.
|
| 143 |
+
elif (member.name is "requirements.txt" and member.isfile()) is True:
|
| 144 |
try:
|
| 145 |
lines = tar.extractfile(member).readlines().decode("utf-8")
|
| 146 |
# extract readme
|