Spaces:
Runtime error
Runtime error
Changed . to _ for SENT ID delimiter
Browse files- extractor.py +3 -3
- prompts.py +6 -5
- upload.py +1 -1
extractor.py
CHANGED
|
@@ -13,7 +13,7 @@ from langchain.docstore.document import Document
|
|
| 13 |
|
| 14 |
from prompts import MAIN_SYSTEM_PROMPT
|
| 15 |
|
| 16 |
-
CITATIONS_REGEX = r"(\b\d{2}\
|
| 17 |
|
| 18 |
|
| 19 |
class Store:
|
|
@@ -129,12 +129,12 @@ class Answerer:
|
|
| 129 |
]
|
| 130 |
result = self.model.invoke(history)
|
| 131 |
citations = [res.group() for res in re.finditer(CITATIONS_REGEX, result.content, re.MULTILINE)]
|
| 132 |
-
cits_pages = set([int(c.split("
|
| 133 |
|
| 134 |
cits = ""
|
| 135 |
for c in cits_pages:
|
| 136 |
try:
|
| 137 |
-
cits += f"{c:0>2}
|
| 138 |
except IndexError:
|
| 139 |
cits += f"{c} - N/A\n"
|
| 140 |
|
|
|
|
| 13 |
|
| 14 |
from prompts import MAIN_SYSTEM_PROMPT
|
| 15 |
|
| 16 |
+
CITATIONS_REGEX = r"(\b\d{2}\_\d{2}\b)"
|
| 17 |
|
| 18 |
|
| 19 |
class Store:
|
|
|
|
| 129 |
]
|
| 130 |
result = self.model.invoke(history)
|
| 131 |
citations = [res.group() for res in re.finditer(CITATIONS_REGEX, result.content, re.MULTILINE)]
|
| 132 |
+
cits_pages = set([int(c.split("_")[0])-1 for c in citations])
|
| 133 |
|
| 134 |
cits = ""
|
| 135 |
for c in cits_pages:
|
| 136 |
try:
|
| 137 |
+
cits += f"{c:0>2}_xx *{citation_mapping['ids'][c]}*\n"
|
| 138 |
except IndexError:
|
| 139 |
cits += f"{c} - N/A\n"
|
| 140 |
|
prompts.py
CHANGED
|
@@ -12,23 +12,24 @@ Your primary objectives are:
|
|
| 12 |
3. **Flexible Application**: Adapt to various tasks, such as genetic counseling, DNA sequencing analysis, genomic editing, and evolutionary studies, based on the context provided.
|
| 13 |
|
| 14 |
Important Behavior:
|
| 15 |
-
- If referencing specific information from the context, explicitly cite the SENT ID of the source. For example: "Based on ID: [number]
|
| 16 |
- If you encounter a topic or question where you lack sufficient information or certainty, clearly state, "I don't know" or "I need more information to answer accurately."
|
| 17 |
- Avoid speculating or fabricating information. Instead, provide guidance on how the information might be obtained or suggest reliable sources.
|
| 18 |
|
| 19 |
You are not allowed to add references to anything other than the SENT sources.
|
| 20 |
|
| 21 |
Here is an example SENT ID:
|
| 22 |
-
<SENT
|
| 23 |
James is a writer.
|
| 24 |
-
</SENT
|
| 25 |
|
| 26 |
If you were to cite this, you would say:
|
| 27 |
-
James is a writer. (
|
| 28 |
|
| 29 |
-
'</SENT []
|
| 30 |
|
| 31 |
Quotations from Sources are always used to substantiate your claims, as long as they are cited.
|
| 32 |
|
| 33 |
Maintain a professional tone while being approachable and thorough. Always clarify or ask for additional context when necessary to ensure your responses are as helpful as possible, while providing proper citations for referenced material.
|
|
|
|
| 34 |
"""
|
|
|
|
| 12 |
3. **Flexible Application**: Adapt to various tasks, such as genetic counseling, DNA sequencing analysis, genomic editing, and evolutionary studies, based on the context provided.
|
| 13 |
|
| 14 |
Important Behavior:
|
| 15 |
+
- If referencing specific information from the context, explicitly cite the SENT ID of the source. For example: "Based on ID: [number]_[number]", number will be a zero padded integer
|
| 16 |
- If you encounter a topic or question where you lack sufficient information or certainty, clearly state, "I don't know" or "I need more information to answer accurately."
|
| 17 |
- Avoid speculating or fabricating information. Instead, provide guidance on how the information might be obtained or suggest reliable sources.
|
| 18 |
|
| 19 |
You are not allowed to add references to anything other than the SENT sources.
|
| 20 |
|
| 21 |
Here is an example SENT ID:
|
| 22 |
+
<SENT 01_23>
|
| 23 |
James is a writer.
|
| 24 |
+
</SENT 01_23>
|
| 25 |
|
| 26 |
If you were to cite this, you would say:
|
| 27 |
+
James is a writer. (01_23)
|
| 28 |
|
| 29 |
+
'</SENT []_[]>' means end of source.
|
| 30 |
|
| 31 |
Quotations from Sources are always used to substantiate your claims, as long as they are cited.
|
| 32 |
|
| 33 |
Maintain a professional tone while being approachable and thorough. Always clarify or ask for additional context when necessary to ensure your responses are as helpful as possible, while providing proper citations for referenced material.
|
| 34 |
+
Answer:
|
| 35 |
"""
|
upload.py
CHANGED
|
@@ -80,7 +80,7 @@ class Uploader:
|
|
| 80 |
merged_sentences = self.merge_the_shorties(sentences,4)
|
| 81 |
|
| 82 |
for idx, sent in enumerate(merged_sentences):
|
| 83 |
-
sentenced_page_content += f"<SENT {(idxo+1):0>2}
|
| 84 |
|
| 85 |
raw.page_content = sentenced_page_content
|
| 86 |
sentenced_pages.append(raw)
|
|
|
|
| 80 |
merged_sentences = self.merge_the_shorties(sentences,4)
|
| 81 |
|
| 82 |
for idx, sent in enumerate(merged_sentences):
|
| 83 |
+
sentenced_page_content += f"<SENT {(idxo+1):0>2}_{(idx+1):0>2}>\n{sent}\n</SENT {(idxo+1):0>2}_{(idx+1):0>2}>\n"
|
| 84 |
|
| 85 |
raw.page_content = sentenced_page_content
|
| 86 |
sentenced_pages.append(raw)
|