Update RepoPipeline.py
Browse files- RepoPipeline.py +3 -3
RepoPipeline.py
CHANGED
|
@@ -113,7 +113,7 @@ def extract_information(repos, headers=None):
|
|
| 113 |
with tarfile.open(fileobj=response.raw, mode="r|gz") as tar:
|
| 114 |
for member in tar:
|
| 115 |
# 2. Extracting codes and docs.
|
| 116 |
-
if
|
| 117 |
try:
|
| 118 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
| 119 |
# extract_code_and_docs
|
|
@@ -127,7 +127,7 @@ def extract_information(repos, headers=None):
|
|
| 127 |
except SyntaxError as e:
|
| 128 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
| 129 |
# 3. Extracting readme.
|
| 130 |
-
elif (
|
| 131 |
try:
|
| 132 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
| 133 |
# extract readme
|
|
@@ -141,7 +141,7 @@ def extract_information(repos, headers=None):
|
|
| 141 |
except SyntaxError as e:
|
| 142 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
| 143 |
# 4. Extracting requirements.
|
| 144 |
-
elif
|
| 145 |
try:
|
| 146 |
lines = tar.extractfile(member).readlines().decode("utf-8")
|
| 147 |
# extract readme
|
|
|
|
| 113 |
with tarfile.open(fileobj=response.raw, mode="r|gz") as tar:
|
| 114 |
for member in tar:
|
| 115 |
# 2. Extracting codes and docs.
|
| 116 |
+
if member.name.endswith(".py") and member.isfile():
|
| 117 |
try:
|
| 118 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
| 119 |
# extract_code_and_docs
|
|
|
|
| 127 |
except SyntaxError as e:
|
| 128 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
| 129 |
# 3. Extracting readme.
|
| 130 |
+
elif (member.name == "README.md" or member.name == "README.rst") and member.isfile():
|
| 131 |
try:
|
| 132 |
file_content = tar.extractfile(member).read().decode("utf-8")
|
| 133 |
# extract readme
|
|
|
|
| 141 |
except SyntaxError as e:
|
| 142 |
tqdm.write(f"[-] SyntaxError in {member.name}, skipping: \n{e}")
|
| 143 |
# 4. Extracting requirements.
|
| 144 |
+
elif member.name == "requirements.txt" and member.isfile():
|
| 145 |
try:
|
| 146 |
lines = tar.extractfile(member).readlines().decode("utf-8")
|
| 147 |
# extract readme
|