briankchan commited on
Commit
dfd1845
Β·
1 Parent(s): 0605bc2

Separate before/after in diff; use better differ

Browse files
Files changed (1) hide show
  1. app.py +81 -35
app.py CHANGED
@@ -9,6 +9,10 @@ from langchain.prompts import PromptTemplate
9
  from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate
10
  from langchain.schema import HumanMessage
11
 
 
 
 
 
12
  def load_chain(api_key, prompt_template):
13
  api_key = openai.api_key if api_key == "" or api_key.isspace() else api_key
14
  if api_key:
@@ -26,29 +30,54 @@ def load_chain(api_key, prompt_template):
26
 
27
  def run(content, chain):
28
  edited = "\n".join([(chain.run(x) if should_check else x) for x, should_check in split_paragraphs(content)])
29
- return diff_words(content, edited), edited
30
 
31
  def split_paragraphs(text):
32
  return [(x, x != "" and not x.startswith("#") and not x.isspace()) for x in text.split("\n")]
33
 
 
 
 
 
 
 
 
 
 
 
 
34
  def diff_words(content, edited):
35
- content = re.split(r'(\s+|[,.!?"\':;]+)', content)
36
- edited = re.split(r'(\s+|[,.!?"\':;]+)', edited)
37
-
38
- output = []
39
- for change, i1, i2, j1, j2 in SequenceMatcher(a=content, b=edited).get_opcodes():
40
- if change == "equal":
41
- output.append((get_parts(content, i1, i2), None))
42
- elif change == "replace":
43
- output.append((get_parts(content, i1, i2) + "β†’" + get_parts(edited, j1, j2), "β†’"))
44
- elif change == "delete":
45
- output.append((get_parts(content, i1, i2), "-"))
46
- elif change == "insert":
47
- output.append((get_parts(edited, j1, j2), "+"))
48
  else:
49
- raise Exception("Unknown change type: " + change)
50
-
51
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  def get_parts(arr, start, end):
54
  return "".join(arr[start:end])
@@ -63,17 +92,26 @@ def run2(followup_question, chain1, llm):
63
  CHANGES = {
64
  "-": "remove",
65
  "+": "add",
66
- "β†’": "change"
67
  }
68
- def select_diff(evt: gr.SelectData):
69
  text, change = evt.value
70
  if not change:
71
  return
72
- if change == "β†’":
73
- original, edited = text.split("β†’")
74
- return f"Why {CHANGES[change]} [{original}] to [{edited}]?"
75
- return f"Why {CHANGES[change]} [{text}]?"
76
-
 
 
 
 
 
 
 
 
 
77
 
78
  block = gr.Blocks()
79
  with block:
@@ -95,14 +133,21 @@ with block:
95
  # variant="secondary"
96
  ).style(full_width=False)
97
 
98
- output = gr.HighlightedText(
99
- label="Output",
100
- color_map={
101
- "-": "red",
102
- "+": "green",
103
- "β†’": "yellow",
104
- }
105
- )
 
 
 
 
 
 
 
106
 
107
  followup_question = gr.Textbox(
108
  label="Follow-up Question",
@@ -114,6 +159,7 @@ with block:
114
  label="Answer"
115
  )
116
 
 
117
  edited = gr.State()
118
  chain = gr.State()
119
  llm = gr.State()
@@ -122,16 +168,16 @@ with block:
122
  prompt.change(load_chain, [api_key, prompt], [chain, llm])
123
 
124
  inputs = [content, chain]
125
- outputs = [output, edited]
126
  content.submit(run, inputs=inputs, outputs=outputs)
127
  submit.click(run, inputs=inputs, outputs=outputs)
128
 
129
- output.select(select_diff, None, followup_question)
 
130
 
131
  inputs2 = [followup_question, chain, llm]
132
  outputs2 = followup_answer
133
  followup_question.submit(run2, inputs2, outputs2)
134
  followup_submit.click(run2, inputs2, outputs2)
135
 
136
-
137
  block.launch(debug=True)
 
9
  from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate
10
  from langchain.schema import HumanMessage
11
 
12
+ from diff_match_patch import diff_match_patch
13
+ import collections
14
+ from itertools import islice
15
+
16
  def load_chain(api_key, prompt_template):
17
  api_key = openai.api_key if api_key == "" or api_key.isspace() else api_key
18
  if api_key:
 
30
 
31
  def run(content, chain):
32
  edited = "\n".join([(chain.run(x) if should_check else x) for x, should_check in split_paragraphs(content)])
33
+ return diff_words(content, edited) + (edited,)
34
 
35
  def split_paragraphs(text):
36
  return [(x, x != "" and not x.startswith("#") and not x.isspace()) for x in text.split("\n")]
37
 
38
+ def sliding_window(iterable, n): # from python itertools recipes
39
+ # sliding_window('ABCDEFG', 4) --> ABCD BCDE CDEF DEFG
40
+ it = iter(iterable)
41
+ window = collections.deque(islice(it, n), maxlen=n)
42
+ if len(window) == n:
43
+ yield tuple(window)
44
+ for x in it:
45
+ window.append(x)
46
+ yield tuple(window)
47
+
48
+ dmp = diff_match_patch()
49
  def diff_words(content, edited):
50
+ before = []
51
+ after = []
52
+ changes = []
53
+ change_count = 0
54
+ changed = False
55
+ diff = dmp.diff_main(content, edited)
56
+ dmp.diff_cleanupSemantic(diff)
57
+ diff += [(None, None)]
58
+ # print(diff)
59
+ for [(change, text), (next_change, next_text)] in sliding_window(diff, 2):
60
+ if change == 0:
61
+ before.append((text, None))
62
+ after.append((text, None))
63
  else:
64
+ if change == -1 and next_change == 1:
65
+ change_count += 1
66
+ before.append((text, str(change_count)))
67
+ after.append((next_text, str(change_count)))
68
+ changes.append((text, next_text))
69
+ changed = True
70
+ elif change == -1:
71
+ before.append((text, "-"))
72
+ elif change == 1:
73
+ if changed:
74
+ changed = False
75
+ else:
76
+ after.append((text, "+"))
77
+ else:
78
+ raise Exception("Unknown change type: " + change)
79
+
80
+ return before, after, changes
81
 
82
  def get_parts(arr, start, end):
83
  return "".join(arr[start:end])
 
92
  CHANGES = {
93
  "-": "remove",
94
  "+": "add",
95
+ # "β†’": "change"
96
  }
97
+ def select_diff(evt: gr.SelectData, changes):
98
  text, change = evt.value
99
  if not change:
100
  return
101
+ change_text = CHANGES.get(change, None)
102
+ if change_text:
103
+ return f"Why is it better to {change_text} [{text}]?"
104
+ # if change == "β†’":
105
+ else:
106
+ # clicked = evt.target
107
+ # if clicked.label == "Before":
108
+ # original = text
109
+ # else:
110
+ # edited = text
111
+
112
+ original, edited = changes[int(change) - 1]
113
+ # original, edited = text.split("β†’")
114
+ return f"Why is it better to change [{original}] to [{edited}]?"
115
 
116
  block = gr.Blocks()
117
  with block:
 
133
  # variant="secondary"
134
  ).style(full_width=False)
135
 
136
+ with gr.Row():
137
+ output_before = gr.HighlightedText(
138
+ label="Before",
139
+ color_map={
140
+ "-": "red",
141
+ # "β†’": "yellow",
142
+ }
143
+ )
144
+ output_after = gr.HighlightedText(
145
+ label="After",
146
+ color_map={
147
+ "+": "green",
148
+ # "β†’": "yellow",
149
+ }
150
+ )
151
 
152
  followup_question = gr.Textbox(
153
  label="Follow-up Question",
 
159
  label="Answer"
160
  )
161
 
162
+ changes = gr.State()
163
  edited = gr.State()
164
  chain = gr.State()
165
  llm = gr.State()
 
168
  prompt.change(load_chain, [api_key, prompt], [chain, llm])
169
 
170
  inputs = [content, chain]
171
+ outputs = [output_before, output_after, changes, edited]
172
  content.submit(run, inputs=inputs, outputs=outputs)
173
  submit.click(run, inputs=inputs, outputs=outputs)
174
 
175
+ output_before.select(select_diff, changes, followup_question)
176
+ output_after.select(select_diff, changes, followup_question)
177
 
178
  inputs2 = [followup_question, chain, llm]
179
  outputs2 = followup_answer
180
  followup_question.submit(run2, inputs2, outputs2)
181
  followup_submit.click(run2, inputs2, outputs2)
182
 
 
183
  block.launch(debug=True)