Kevin Hu
commited on
Commit
·
b80e2f3
1
Parent(s):
d73a5e2
Fix csv for TAG. (#4454)
Browse files### What problem does this PR solve?
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- rag/app/tag.py +2 -2
rag/app/tag.py
CHANGED
|
@@ -91,14 +91,14 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs):
|
|
| 91 |
callback(0.1, "Start to parse.")
|
| 92 |
txt = get_text(filename, binary)
|
| 93 |
lines = txt.split("\n")
|
| 94 |
-
delimiter = "\t" if any("\t" in line for line in lines) else ","
|
| 95 |
|
| 96 |
fails = []
|
| 97 |
content = ""
|
| 98 |
res = []
|
| 99 |
-
reader = csv.reader(lines
|
| 100 |
|
| 101 |
for i, row in enumerate(reader):
|
|
|
|
| 102 |
if len(row) != 2:
|
| 103 |
content += "\n" + lines[i]
|
| 104 |
elif len(row) == 2:
|
|
|
|
| 91 |
callback(0.1, "Start to parse.")
|
| 92 |
txt = get_text(filename, binary)
|
| 93 |
lines = txt.split("\n")
|
|
|
|
| 94 |
|
| 95 |
fails = []
|
| 96 |
content = ""
|
| 97 |
res = []
|
| 98 |
+
reader = csv.reader(lines)
|
| 99 |
|
| 100 |
for i, row in enumerate(reader):
|
| 101 |
+
row = [r.strip() for r in row if r.strip()]
|
| 102 |
if len(row) != 2:
|
| 103 |
content += "\n" + lines[i]
|
| 104 |
elif len(row) == 2:
|