Spaces:
Runtime error
Runtime error
Adrien
commited on
Commit
·
bccb279
1
Parent(s):
3823e3e
clean up
Browse files
rag_demo/rag/source_annotator.py
CHANGED
|
@@ -11,6 +11,7 @@ from transformers import pipeline
|
|
| 11 |
|
| 12 |
class SourceAnnotator:
|
| 13 |
def __init__(self):
|
|
|
|
| 14 |
self.source_annotator = pipeline(
|
| 15 |
"question-answering",
|
| 16 |
model="distilbert/distilbert-base-cased-distilled-squad",
|
|
@@ -22,7 +23,7 @@ class SourceAnnotator:
|
|
| 22 |
for sentence in sentences:
|
| 23 |
scores = []
|
| 24 |
for chunk in reranked_chunks:
|
| 25 |
-
score = self.
|
| 26 |
score["filename"] = chunk.metadata["filename"].split(".pdf")[0]
|
| 27 |
score["chunk_id"] = chunk.chunk_id
|
| 28 |
scores.append(score)
|
|
@@ -38,6 +39,3 @@ class SourceAnnotator:
|
|
| 38 |
pattern = r"(?<=[.!?])\s+(?=[A-Z])"
|
| 39 |
sentences = re.split(pattern, text)
|
| 40 |
return [s.strip() for s in sentences if s.strip()]
|
| 41 |
-
|
| 42 |
-
def annotate_source(self, text: str, chunk: str) -> dict:
|
| 43 |
-
return self.source_annotator(text, chunk)
|
|
|
|
| 11 |
|
| 12 |
class SourceAnnotator:
|
| 13 |
def __init__(self):
|
| 14 |
+
# Extractive question answering model
|
| 15 |
self.source_annotator = pipeline(
|
| 16 |
"question-answering",
|
| 17 |
model="distilbert/distilbert-base-cased-distilled-squad",
|
|
|
|
| 23 |
for sentence in sentences:
|
| 24 |
scores = []
|
| 25 |
for chunk in reranked_chunks:
|
| 26 |
+
score = self.source_annotator(sentence.lower(), chunk.content.lower())
|
| 27 |
score["filename"] = chunk.metadata["filename"].split(".pdf")[0]
|
| 28 |
score["chunk_id"] = chunk.chunk_id
|
| 29 |
scores.append(score)
|
|
|
|
| 39 |
pattern = r"(?<=[.!?])\s+(?=[A-Z])"
|
| 40 |
sentences = re.split(pattern, text)
|
| 41 |
return [s.strip() for s in sentences if s.strip()]
|
|
|
|
|
|
|
|
|