Create README.md
#1
by
Corianas
- opened
README.md
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
---
|
| 6 |
+
This is a Re-act style model.
|
| 7 |
+
|
| 8 |
+
Dataset was parsed with:
|
| 9 |
+
```
|
| 10 |
+
def extract_trajectory_info(data):
|
| 11 |
+
"""
|
| 12 |
+
Extracts the question, thoughts, actions, and observations from the trajectory field of the data.
|
| 13 |
+
|
| 14 |
+
Parameters:
|
| 15 |
+
data (dict): The data entry containing the trajectory field.
|
| 16 |
+
|
| 17 |
+
Returns:
|
| 18 |
+
dict: A dictionary containing the extracted question, thoughts, actions, and observations.
|
| 19 |
+
"""
|
| 20 |
+
# Extracting the question
|
| 21 |
+
question = data.get('question', '')
|
| 22 |
+
|
| 23 |
+
# Extracting thoughts, actions, and observations using regex
|
| 24 |
+
thoughts = re.findall(r'Thought \d+: (.+?)(?=Action|\Z)', data.get('trajectory', ''), re.DOTALL)
|
| 25 |
+
actions = re.findall(r'Action \d+: (.+?)(?=Observation|\Z)', data.get('trajectory', ''), re.DOTALL)
|
| 26 |
+
observations = re.findall(r'Observation \d+: (.+?)(?=Thought|\Z)', data.get('trajectory', ''), re.DOTALL)
|
| 27 |
+
|
| 28 |
+
# Cleaning up the extracted data
|
| 29 |
+
thoughts = [thought.strip() for thought in thoughts]
|
| 30 |
+
actions = [action.strip() for action in actions]
|
| 31 |
+
observations = [observation.strip() for observation in observations]
|
| 32 |
+
|
| 33 |
+
return {
|
| 34 |
+
'question': question,
|
| 35 |
+
'thoughts': thoughts,
|
| 36 |
+
'actions': actions,
|
| 37 |
+
'observations': observations
|
| 38 |
+
}
|
| 39 |
+
# Sample data
|
| 40 |
+
extracted_info = extract_trajectory_info(ds["train"][0])
|
| 41 |
+
```
|
| 42 |
+
Then remade into a new dataset with
|
| 43 |
+
```
|
| 44 |
+
# Predefine the instructions for the task
|
| 45 |
+
preamble = """Tools available:
|
| 46 |
+
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
|
| 47 |
+
(2) Lookup[keyword], which returns the next sentence containing the keyword in the current passage.
|
| 48 |
+
(3) Finish[answer], which returns the answer and finishes the task.
|
| 49 |
+
"""
|
| 50 |
+
dataset = []
|
| 51 |
+
# Iterate through a specified number of examples in the training set
|
| 52 |
+
for i in range(len(ds['train'])):
|
| 53 |
+
extracted_info = extract_trajectory_info(ds['train'][i])
|
| 54 |
+
|
| 55 |
+
# Iterate through each thought in the extracted information
|
| 56 |
+
for j in range(len(extracted_info['thoughts'])):
|
| 57 |
+
out = f"{preamble}---\nQuestion: {extracted_info['question']}\n"
|
| 58 |
+
prev = ""
|
| 59 |
+
# Construct output for the first thought
|
| 60 |
+
if j == 0:
|
| 61 |
+
out += f"Thought: {extracted_info['thoughts'][0]}\n"
|
| 62 |
+
out += f"Action: {extracted_info['actions'][0]}\nPAUSE\n\n\n\n"
|
| 63 |
+
|
| 64 |
+
else:
|
| 65 |
+
for k in range(1, j + 1):
|
| 66 |
+
# Use appropriate indexing to avoid out-of-bounds errors
|
| 67 |
+
prev += f"Thought:{extracted_info['thoughts'][j - k]}\n"
|
| 68 |
+
prev += f"Action: {extracted_info['actions'][j - k]}\nPAUSE\n"
|
| 69 |
+
|
| 70 |
+
prev += f"Observation: {extracted_info['observations'][j - k]}\n"
|
| 71 |
+
|
| 72 |
+
out += prev # Remove trailing space
|
| 73 |
+
out += f"---\nThought: {extracted_info['thoughts'][j]}\n"
|
| 74 |
+
out += f"Action: {extracted_info['actions'][j]}\nPAUSE\n\n\n\n"
|
| 75 |
+
|
| 76 |
+
# Print the constructed output
|
| 77 |
+
print(out)
|
| 78 |
+
dataset.append(out)
|
| 79 |
+
#print(len(out))
|
| 80 |
+
```
|