samwoof commited on
Commit
33e787b
·
1 Parent(s): 3f92f3c

Changed . to _ for SENT ID delimiter

Browse files
Files changed (3) hide show
  1. extractor.py +3 -3
  2. prompts.py +6 -5
  3. upload.py +1 -1
extractor.py CHANGED
@@ -13,7 +13,7 @@ from langchain.docstore.document import Document
13
 
14
  from prompts import MAIN_SYSTEM_PROMPT
15
 
16
- CITATIONS_REGEX = r"(\b\d{2}\.\d{2}\b)"
17
 
18
 
19
  class Store:
@@ -129,12 +129,12 @@ class Answerer:
129
  ]
130
  result = self.model.invoke(history)
131
  citations = [res.group() for res in re.finditer(CITATIONS_REGEX, result.content, re.MULTILINE)]
132
- cits_pages = set([int(c.split(".")[0])-1 for c in citations])
133
 
134
  cits = ""
135
  for c in cits_pages:
136
  try:
137
- cits += f"{c:0>2}.xx*{citation_mapping['ids'][c]}*\n"
138
  except IndexError:
139
  cits += f"{c} - N/A\n"
140
 
 
13
 
14
  from prompts import MAIN_SYSTEM_PROMPT
15
 
16
+ CITATIONS_REGEX = r"(\b\d{2}\_\d{2}\b)"
17
 
18
 
19
  class Store:
 
129
  ]
130
  result = self.model.invoke(history)
131
  citations = [res.group() for res in re.finditer(CITATIONS_REGEX, result.content, re.MULTILINE)]
132
+ cits_pages = set([int(c.split("_")[0])-1 for c in citations])
133
 
134
  cits = ""
135
  for c in cits_pages:
136
  try:
137
+ cits += f"{c:0>2}_xx *{citation_mapping['ids'][c]}*\n"
138
  except IndexError:
139
  cits += f"{c} - N/A\n"
140
 
prompts.py CHANGED
@@ -12,23 +12,24 @@ Your primary objectives are:
12
  3. **Flexible Application**: Adapt to various tasks, such as genetic counseling, DNA sequencing analysis, genomic editing, and evolutionary studies, based on the context provided.
13
 
14
  Important Behavior:
15
- - If referencing specific information from the context, explicitly cite the SENT ID of the source. For example: "Based on ID: [number].[number]", number will be a zero padded integer
16
  - If you encounter a topic or question where you lack sufficient information or certainty, clearly state, "I don't know" or "I need more information to answer accurately."
17
  - Avoid speculating or fabricating information. Instead, provide guidance on how the information might be obtained or suggest reliable sources.
18
 
19
  You are not allowed to add references to anything other than the SENT sources.
20
 
21
  Here is an example SENT ID:
22
- <SENT 01.23>
23
  James is a writer.
24
- </SENT 01.23>
25
 
26
  If you were to cite this, you would say:
27
- James is a writer. (01.23)
28
 
29
- '</SENT [].[]>' means end of source.
30
 
31
  Quotations from Sources are always used to substantiate your claims, as long as they are cited.
32
 
33
  Maintain a professional tone while being approachable and thorough. Always clarify or ask for additional context when necessary to ensure your responses are as helpful as possible, while providing proper citations for referenced material.
 
34
  """
 
12
  3. **Flexible Application**: Adapt to various tasks, such as genetic counseling, DNA sequencing analysis, genomic editing, and evolutionary studies, based on the context provided.
13
 
14
  Important Behavior:
15
+ - If referencing specific information from the context, explicitly cite the SENT ID of the source. For example: "Based on ID: [number]_[number]", number will be a zero padded integer
16
  - If you encounter a topic or question where you lack sufficient information or certainty, clearly state, "I don't know" or "I need more information to answer accurately."
17
  - Avoid speculating or fabricating information. Instead, provide guidance on how the information might be obtained or suggest reliable sources.
18
 
19
  You are not allowed to add references to anything other than the SENT sources.
20
 
21
  Here is an example SENT ID:
22
+ <SENT 01_23>
23
  James is a writer.
24
+ </SENT 01_23>
25
 
26
  If you were to cite this, you would say:
27
+ James is a writer. (01_23)
28
 
29
+ '</SENT []_[]>' means end of source.
30
 
31
  Quotations from Sources are always used to substantiate your claims, as long as they are cited.
32
 
33
  Maintain a professional tone while being approachable and thorough. Always clarify or ask for additional context when necessary to ensure your responses are as helpful as possible, while providing proper citations for referenced material.
34
+ Answer:
35
  """
upload.py CHANGED
@@ -80,7 +80,7 @@ class Uploader:
80
  merged_sentences = self.merge_the_shorties(sentences,4)
81
 
82
  for idx, sent in enumerate(merged_sentences):
83
- sentenced_page_content += f"<SENT {(idxo+1):0>2}.{(idx+1):0>2}>\n{sent}\n</SENT {(idxo+1):0>2}.{(idx+1):0>2}>\n"
84
 
85
  raw.page_content = sentenced_page_content
86
  sentenced_pages.append(raw)
 
80
  merged_sentences = self.merge_the_shorties(sentences,4)
81
 
82
  for idx, sent in enumerate(merged_sentences):
83
+ sentenced_page_content += f"<SENT {(idxo+1):0>2}_{(idx+1):0>2}>\n{sent}\n</SENT {(idxo+1):0>2}_{(idx+1):0>2}>\n"
84
 
85
  raw.page_content = sentenced_page_content
86
  sentenced_pages.append(raw)