Spaces:
Sleeping
Sleeping
wuyiqunLu
commited on
feat: integrate vision agent v3 for logs and code (#51)
Browse fileswhat looks like for V2:
https://github.com/landing-ai/vision-agent/assets/132986242/95eb4605-0d37-402c-8401-b843bfd769ad
V3:
https://github.com/landing-ai/vision-agent/assets/132986242/4728641e-a9a7-49eb-81e7-2a5eff155224
- app/api/vision-agent/route.ts +65 -4
- components/chat/ChatMessage.tsx +76 -100
- lib/messageUtils.ts +34 -38
app/api/vision-agent/route.ts
CHANGED
|
@@ -57,7 +57,7 @@ export const POST = withLogging(
|
|
| 57 |
|
| 58 |
const fetchResponse = await fetch(
|
| 59 |
`https://api.dev.landing.ai/v1/agent/chat?agent_class=vision_agent&visualize_output=true&self_reflection=${enableSelfReflection}`,
|
| 60 |
-
// `http://localhost:5001/v1/agent/chat?agent_class=vision_agent&
|
| 61 |
{
|
| 62 |
method: 'POST',
|
| 63 |
headers: {
|
|
@@ -114,9 +114,70 @@ export const POST = withLogging(
|
|
| 114 |
const stream = fetchResponse.body.pipeThrough(
|
| 115 |
new TransformStream({
|
| 116 |
transform: async (chunk, controller) => {
|
| 117 |
-
const
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
},
|
| 121 |
}),
|
| 122 |
);
|
|
|
|
| 57 |
|
| 58 |
const fetchResponse = await fetch(
|
| 59 |
`https://api.dev.landing.ai/v1/agent/chat?agent_class=vision_agent&visualize_output=true&self_reflection=${enableSelfReflection}`,
|
| 60 |
+
// `http://localhost:5001/v1/agent/chat?agent_class=vision_agent&self_reflection=${enableSelfReflection}`,
|
| 61 |
{
|
| 62 |
method: 'POST',
|
| 63 |
headers: {
|
|
|
|
| 114 |
const stream = fetchResponse.body.pipeThrough(
|
| 115 |
new TransformStream({
|
| 116 |
transform: async (chunk, controller) => {
|
| 117 |
+
const data = decoder.decode(chunk);
|
| 118 |
+
data.split('\n').forEach(line => {
|
| 119 |
+
if (!line.trim()) {
|
| 120 |
+
return;
|
| 121 |
+
}
|
| 122 |
+
try {
|
| 123 |
+
const json = JSON.parse(line);
|
| 124 |
+
let message = (json.log ?? '') + '\n';
|
| 125 |
+
if (json.task || json.plan || json.reflection) {
|
| 126 |
+
const arr = json.plan
|
| 127 |
+
? json.plan
|
| 128 |
+
: json.task
|
| 129 |
+
? [json.task]
|
| 130 |
+
: [json.reflection];
|
| 131 |
+
const keys = Object.keys(arr[0]);
|
| 132 |
+
message += '\n';
|
| 133 |
+
message += '| ' + keys.join(' | ') + ' |' + '\n';
|
| 134 |
+
message +=
|
| 135 |
+
new Array(keys.length + 1).fill('|').join(' :- ') + '\n';
|
| 136 |
+
arr.forEach((obj: any) => {
|
| 137 |
+
message +=
|
| 138 |
+
'| ' +
|
| 139 |
+
keys.map(key => obj[key]).join(' | ') +
|
| 140 |
+
' |' +
|
| 141 |
+
'\n';
|
| 142 |
+
});
|
| 143 |
+
message += '\n';
|
| 144 |
+
}
|
| 145 |
+
if (json.tools) {
|
| 146 |
+
message += '\n';
|
| 147 |
+
message += '| ' + 'Descriptions' + ' |' + '\n';
|
| 148 |
+
message += '| ' + ':-' + ' |' + '\n';
|
| 149 |
+
json.tools.forEach((tool: string) => {
|
| 150 |
+
message += '| ' + tool + ' |' + '\n';
|
| 151 |
+
});
|
| 152 |
+
message += '\n';
|
| 153 |
+
}
|
| 154 |
+
if (json.code) {
|
| 155 |
+
message += `\`\`\`python\n${json.code}\n\`\`\`\n`;
|
| 156 |
+
}
|
| 157 |
+
if (json.result) {
|
| 158 |
+
message += `\`\`\`\n${json.result}\n\`\`\`\n`;
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
logger.info(
|
| 162 |
+
session,
|
| 163 |
+
{
|
| 164 |
+
message,
|
| 165 |
+
},
|
| 166 |
+
request,
|
| 167 |
+
'__AGENT_RESPONSE',
|
| 168 |
+
);
|
| 169 |
+
controller.enqueue(encoder.encode(message));
|
| 170 |
+
} catch (e) {
|
| 171 |
+
console.log(data);
|
| 172 |
+
logger.error(
|
| 173 |
+
session,
|
| 174 |
+
{ message: (e as Error).message, data },
|
| 175 |
+
request,
|
| 176 |
+
);
|
| 177 |
+
controller.error(e);
|
| 178 |
+
controller.terminate();
|
| 179 |
+
}
|
| 180 |
+
});
|
| 181 |
},
|
| 182 |
}),
|
| 183 |
);
|
components/chat/ChatMessage.tsx
CHANGED
|
@@ -48,119 +48,95 @@ export function ChatMessage({
|
|
| 48 |
{message.role === 'user' ? <IconUser /> : <IconOpenAI />}
|
| 49 |
</div>
|
| 50 |
<div className="flex-1 px-1 ml-4 space-y-2 overflow-hidden">
|
| 51 |
-
{logs &&
|
| 52 |
-
<
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
return (
|
| 60 |
-
<p className="
|
| 61 |
{children}
|
| 62 |
</p>
|
| 63 |
);
|
| 64 |
-
}
|
| 65 |
-
code({ children, className, ...props }) {
|
| 66 |
-
const match = /language-(\w+)/.exec(className || '');
|
| 67 |
-
return (
|
| 68 |
-
<CodeBlock
|
| 69 |
-
key={Math.random()}
|
| 70 |
-
language={(match && match[1]) || ''}
|
| 71 |
-
value={String(children).replace(/\n$/, '')}
|
| 72 |
-
{...props}
|
| 73 |
-
/>
|
| 74 |
-
);
|
| 75 |
-
},
|
| 76 |
-
}}
|
| 77 |
-
>
|
| 78 |
-
{logs}
|
| 79 |
-
</MemoizedReactMarkdown>
|
| 80 |
-
</div>
|
| 81 |
-
)}
|
| 82 |
-
<MemoizedReactMarkdown
|
| 83 |
-
className="break-words"
|
| 84 |
-
remarkPlugins={[remarkGfm, remarkMath]}
|
| 85 |
-
components={{
|
| 86 |
-
p({ children, ...props }) {
|
| 87 |
-
if (
|
| 88 |
-
props.node.children.some(
|
| 89 |
-
child => child.type === 'element' && child.tagName === 'img',
|
| 90 |
-
)
|
| 91 |
-
) {
|
| 92 |
-
return (
|
| 93 |
-
<p className="flex flex-wrap gap-2 items-start">{children}</p>
|
| 94 |
-
);
|
| 95 |
-
}
|
| 96 |
-
return (
|
| 97 |
-
<p className="my-2 last:mb-0 whitespace-pre-line">{children}</p>
|
| 98 |
-
);
|
| 99 |
-
},
|
| 100 |
-
img(props) {
|
| 101 |
-
if (props.src?.endsWith('.mp4')) {
|
| 102 |
return (
|
| 103 |
-
<
|
|
|
|
|
|
|
| 104 |
);
|
| 105 |
-
}
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
<TooltipTrigger asChild>
|
| 109 |
-
<Img
|
| 110 |
-
src={props.src ?? '/landing.png'}
|
| 111 |
-
alt={props.alt ?? 'answer-image'}
|
| 112 |
-
quality={100}
|
| 113 |
-
className="cursor-zoom-in"
|
| 114 |
-
sizes="(min-width: 66em) 25vw,
|
| 115 |
-
(min-width: 44em) 40vw,
|
| 116 |
-
100vw"
|
| 117 |
-
/>
|
| 118 |
-
</TooltipTrigger>
|
| 119 |
-
<TooltipContent>
|
| 120 |
-
<Img
|
| 121 |
-
className="m-2"
|
| 122 |
-
src={props.src ?? '/landing.png'}
|
| 123 |
-
alt={props.alt ?? 'answer-image'}
|
| 124 |
-
quality={100}
|
| 125 |
-
width={500}
|
| 126 |
-
/>
|
| 127 |
-
</TooltipContent>
|
| 128 |
-
</Tooltip>
|
| 129 |
-
);
|
| 130 |
-
},
|
| 131 |
-
code({ node, inline, className, children, ...props }) {
|
| 132 |
-
if (children.length) {
|
| 133 |
-
if (children[0] == '▍') {
|
| 134 |
return (
|
| 135 |
-
<
|
| 136 |
);
|
| 137 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
-
children[0] = (children[0] as string).replace('`▍`', '▍');
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
-
const match = /language-(\w+)/.exec(className || '');
|
| 143 |
-
if (inline) {
|
| 144 |
return (
|
| 145 |
-
<
|
| 146 |
-
{
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
| 148 |
);
|
| 149 |
-
}
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
value={String(children).replace(/\n$/, '')}
|
| 156 |
-
{...props}
|
| 157 |
-
/>
|
| 158 |
-
);
|
| 159 |
-
},
|
| 160 |
-
}}
|
| 161 |
-
>
|
| 162 |
-
{content}
|
| 163 |
-
</MemoizedReactMarkdown>
|
| 164 |
{/* <ChatMessageActions message={message} /> */}
|
| 165 |
{isLoading && <Loading />}
|
| 166 |
</div>
|
|
|
|
| 48 |
{message.role === 'user' ? <IconUser /> : <IconOpenAI />}
|
| 49 |
</div>
|
| 50 |
<div className="flex-1 px-1 ml-4 space-y-2 overflow-hidden">
|
| 51 |
+
{logs && (
|
| 52 |
+
<MemoizedReactMarkdown
|
| 53 |
+
className="break-words"
|
| 54 |
+
remarkPlugins={[remarkGfm, remarkMath]}
|
| 55 |
+
components={{
|
| 56 |
+
p({ children, ...props }) {
|
| 57 |
+
if (
|
| 58 |
+
props.node.children.some(
|
| 59 |
+
child =>
|
| 60 |
+
child.type === 'element' && child.tagName === 'img',
|
| 61 |
+
)
|
| 62 |
+
) {
|
| 63 |
return (
|
| 64 |
+
<p className="flex flex-wrap gap-2 items-start">
|
| 65 |
{children}
|
| 66 |
</p>
|
| 67 |
);
|
| 68 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
return (
|
| 70 |
+
<p className="my-2 last:mb-0 whitespace-pre-line">
|
| 71 |
+
{children}
|
| 72 |
+
</p>
|
| 73 |
);
|
| 74 |
+
},
|
| 75 |
+
img(props) {
|
| 76 |
+
if (props.src?.endsWith('.mp4')) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
return (
|
| 78 |
+
<video src={props.src} controls width={500} height={500} />
|
| 79 |
);
|
| 80 |
}
|
| 81 |
+
return (
|
| 82 |
+
<Tooltip>
|
| 83 |
+
<TooltipTrigger asChild>
|
| 84 |
+
<Img
|
| 85 |
+
src={props.src ?? '/landing.png'}
|
| 86 |
+
alt={props.alt ?? 'answer-image'}
|
| 87 |
+
quality={100}
|
| 88 |
+
className="cursor-zoom-in"
|
| 89 |
+
sizes="(min-width: 66em) 25vw,
|
| 90 |
+
(min-width: 44em) 40vw,
|
| 91 |
+
100vw"
|
| 92 |
+
/>
|
| 93 |
+
</TooltipTrigger>
|
| 94 |
+
<TooltipContent>
|
| 95 |
+
<Img
|
| 96 |
+
className="m-2"
|
| 97 |
+
src={props.src ?? '/landing.png'}
|
| 98 |
+
alt={props.alt ?? 'answer-image'}
|
| 99 |
+
quality={100}
|
| 100 |
+
width={500}
|
| 101 |
+
/>
|
| 102 |
+
</TooltipContent>
|
| 103 |
+
</Tooltip>
|
| 104 |
+
);
|
| 105 |
+
},
|
| 106 |
+
code({ node, inline, className, children, ...props }) {
|
| 107 |
+
// if (children.length) {
|
| 108 |
+
// if (children[0] == '▍') {
|
| 109 |
+
// return (
|
| 110 |
+
// <span className="mt-1 cursor-default animate-pulse">▍</span>
|
| 111 |
+
// );
|
| 112 |
+
// }
|
| 113 |
|
| 114 |
+
// children[0] = (children[0] as string).replace('`▍`', '▍');
|
| 115 |
+
// }
|
| 116 |
+
|
| 117 |
+
const match = /language-(\w+)/.exec(className || '');
|
| 118 |
+
// if (inline) {
|
| 119 |
+
// return (
|
| 120 |
+
// <code className={className} {...props}>
|
| 121 |
+
// {children}
|
| 122 |
+
// </code>
|
| 123 |
+
// );
|
| 124 |
+
// }
|
| 125 |
|
|
|
|
|
|
|
| 126 |
return (
|
| 127 |
+
<CodeBlock
|
| 128 |
+
key={Math.random()}
|
| 129 |
+
language={(match && match[1]) || ''}
|
| 130 |
+
value={String(children).replace(/\n$/, '')}
|
| 131 |
+
{...props}
|
| 132 |
+
/>
|
| 133 |
);
|
| 134 |
+
},
|
| 135 |
+
}}
|
| 136 |
+
>
|
| 137 |
+
{logs}
|
| 138 |
+
</MemoizedReactMarkdown>
|
| 139 |
+
)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
{/* <ChatMessageActions message={message} /> */}
|
| 141 |
{isLoading && <Loading />}
|
| 142 |
</div>
|
lib/messageUtils.ts
CHANGED
|
@@ -42,11 +42,6 @@ export const getCleanedUpMessages = ({
|
|
| 42 |
content,
|
| 43 |
role,
|
| 44 |
}: Pick<MessageBase, 'role' | 'content'>) => {
|
| 45 |
-
if (role === 'user') {
|
| 46 |
-
return {
|
| 47 |
-
content,
|
| 48 |
-
};
|
| 49 |
-
}
|
| 50 |
if (content.split(CLEANED_SEPARATOR).length === 2) {
|
| 51 |
return {
|
| 52 |
logs: content.split(CLEANED_SEPARATOR)[0],
|
|
@@ -54,38 +49,39 @@ export const getCleanedUpMessages = ({
|
|
| 54 |
};
|
| 55 |
}
|
| 56 |
const [logs = '', answer = ''] = content.split('<ANSWER>');
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
let
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
}
|
| 88 |
-
|
|
|
|
| 89 |
const [answerText, imagesStr = ''] = answer.split('<VIZ>');
|
| 90 |
const [imagesArrayStr, ...rest] = imagesStr.split('</VIZ>');
|
| 91 |
const images = imagesArrayStr
|
|
@@ -93,7 +89,7 @@ export const getCleanedUpMessages = ({
|
|
| 93 |
.map(str => str.replace('<IMG>', ''))
|
| 94 |
.slice(0, -1);
|
| 95 |
return {
|
| 96 |
-
logs:
|
| 97 |
content:
|
| 98 |
answerText.replace('</</ANSWER>', '').replace('</ANSWER>', '') +
|
| 99 |
'\n\n' +
|
|
|
|
| 42 |
content,
|
| 43 |
role,
|
| 44 |
}: Pick<MessageBase, 'role' | 'content'>) => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
if (content.split(CLEANED_SEPARATOR).length === 2) {
|
| 46 |
return {
|
| 47 |
logs: content.split(CLEANED_SEPARATOR)[0],
|
|
|
|
| 49 |
};
|
| 50 |
}
|
| 51 |
const [logs = '', answer = ''] = content.split('<ANSWER>');
|
| 52 |
+
// console.log(logs);
|
| 53 |
+
// const cleanedLogs = [];
|
| 54 |
+
// let left = 0;
|
| 55 |
+
// let right = 0;
|
| 56 |
+
// while (right < logs.length) {
|
| 57 |
+
// if (Object.keys(PAIRS).includes(content[right])) {
|
| 58 |
+
// cleanedLogs.push(content.substring(left, right));
|
| 59 |
+
// left = right++;
|
| 60 |
+
// while (
|
| 61 |
+
// right < content.length &&
|
| 62 |
+
// PAIRS[content[left]] !== content[right]
|
| 63 |
+
// ) {
|
| 64 |
+
// right++;
|
| 65 |
+
// }
|
| 66 |
+
// if (content[left] === MIDDLE_STARTER) {
|
| 67 |
+
// // add the text alignment so it can be shown as a table
|
| 68 |
+
// const separators = logs
|
| 69 |
+
// .substring(left, right)
|
| 70 |
+
// .split(MIDDLE_SEPARATOR).length;
|
| 71 |
+
// if (separators > 0) {
|
| 72 |
+
// cleanedLogs.push(
|
| 73 |
+
// Array(separators + 1)
|
| 74 |
+
// .fill('|')
|
| 75 |
+
// .join(' :- '),
|
| 76 |
+
// );
|
| 77 |
+
// }
|
| 78 |
+
// }
|
| 79 |
+
// left = ++right;
|
| 80 |
+
// } else {
|
| 81 |
+
// right++;
|
| 82 |
+
// }
|
| 83 |
+
// }
|
| 84 |
+
// cleanedLogs.push(content.substring(left, right));
|
| 85 |
const [answerText, imagesStr = ''] = answer.split('<VIZ>');
|
| 86 |
const [imagesArrayStr, ...rest] = imagesStr.split('</VIZ>');
|
| 87 |
const images = imagesArrayStr
|
|
|
|
| 89 |
.map(str => str.replace('<IMG>', ''))
|
| 90 |
.slice(0, -1);
|
| 91 |
return {
|
| 92 |
+
logs: logs,
|
| 93 |
content:
|
| 94 |
answerText.replace('</</ANSWER>', '').replace('</ANSWER>', '') +
|
| 95 |
'\n\n' +
|