Spaces:

RosticFACE
/

UI-Generator-from-Image

Running

App Files Files Community

UI-Generator-from-Image / services /geminiService.ts

RosticFACE

Upload 28 files

4840fb6 verified 7 months ago

raw

history blame contribute delete

15.3 kB

	import { GoogleGenAI, GenerateContentResponse } from "@google/genai";
	import { Provider, AppType } from '../types';

	type ImagePart = {
	mimeType: string;
	data: string;
	};

	const ANALYSIS_PROMPT_PYTHON = `
	You are an expert UI/UX analyst. Your task is to analyze the provided UI images and user instructions and generate a detailed textual specification for a developer.
	This specification will be used by another AI to write Python PyQt6 code.

	Analysis Steps:
	1. Examine the UI Images: Identify all UI components (e.g., buttons, input fields, labels, sliders, web views, menus). Note their positions, sizes, colors, and any text they contain.
	2. Infer Layout and Structure: Describe the overall layout of the application (e.g., grid layout, vertical box layout, main window with a status bar). Use PyQt6 layout managers in your description.
	3. Determine Functionality: Based on the components and their context, infer the application's purpose and the function of each element.
	4. Incorporate User Instructions: You MUST integrate any provided user instructions into your specification. These instructions override any inferences from the images. For example, if an image shows a blue button but the user asks to 'make the button green', your specification must describe a green button.

	Output Format:
	Provide a clear, detailed, and well-structured description of the application. Do NOT write any Python code. The output should be a blueprint that a PyQt6 developer can follow precisely.
	`;

	const PYQT_CODEGEN_PROMPT = `
	You are an expert Python developer specializing in PyQt6. Your task is to create a fully functional desktop application based on a detailed specification.

	Instructions:
	1. Read the Specification: Carefully read the entire application specification provided below.
	2. Generate Code: Write a single, complete, and executable Python script using the PyQt6 library that implements the specification precisely.
	3. Implement All Logic: The generated code must not only replicate the visual layout but also implement all the described functionality.
	- For web browsers, use PyQt6.QtWebEngineWidgets.
	- For calculators, ensure all buttons are connected to functions that perform the correct calculations.
	- For text editors, implement text editing and file operations.
	4. Code Requirements:
	- The script must be self-contained and runnable.
	- Include all necessary imports.
	- Define a main window class (e.g., \`QMainWindow\`).
	- Connect signals to slots to implement functionality.
	- Include the standard boilerplate to instantiate and run the \`QApplication\`.
	5. Output Format: Provide ONLY the raw Python code. Do not include any explanations, comments about the code, or markdown fences like \`\`\`python.

	--- APPLICATION SPECIFICATION ---
	`;

	const ANALYSIS_PROMPT_CPP = `
	You are an expert UI/UX analyst. Your task is to analyze the provided UI images and user instructions and generate a detailed textual specification for a developer.
	This specification will be used by another AI to write a C++ Qt application.

	Analysis Steps:
	1. Examine the UI Images: Identify all UI components (e.g., QPushButton, QLineEdit, QLabel, QSlider, QWebEngineView, QMenu). Note their positions, sizes, colors, and any text they contain.
	2. Infer Layout and Structure: Describe the overall layout of the application (e.g., QGridLayout, QVBoxLayout, QMainWindow with a QStatusBar). Use C++ Qt layout managers in your description.
	3. Determine Functionality: Based on the components and their context, infer the application's purpose and the function of each element.
	4. Incorporate User Instructions: You MUST integrate any provided user instructions into your specification. These instructions override any inferences from the images. For example, if an image shows a blue button but the user asks to 'make the button green', your specification must describe a green button.

	Output Format:
	Provide a clear, detailed, and well-structured description of the application. Do NOT write any C++ code. The output should be a blueprint that a C++/Qt developer can follow precisely.
	`;

	const CPP_QT_CODEGEN_PROMPT = `
	You are an expert C++ developer specializing in the Qt 6 framework. Your task is to create a fully functional, multi-file desktop application based on a detailed specification.

	Instructions:
	1. Read the Specification: Carefully read the entire application specification provided below.
	2. Generate Code Structure: Create a complete, compilable, and executable C++/Qt application with the following file structure:
	- \`main.cpp\`: The main entry point for the application. It should instantiate and show the main window.
	- \`mainwindow.h\`: The header file for your main window class (e.g., \`MainWindow\`), which should inherit from \`QMainWindow\`. It should declare all UI elements, layouts, and slots.
	- \`mainwindow.cpp\`: The implementation file for your main window class. It should define the constructor (where the UI is built), and implement all slots (functionality).
	3. Implement All Logic: The generated code must not only replicate the visual layout but also implement all the described functionality.
	- For web browsers, use QWebEngineView from the QtWebEngineWidgets module.
	- For calculators, ensure all buttons are connected to slots that perform the correct calculations.
	4. Code Requirements:
	- Use C++17 or later.
	- Include header guards in \`.h\` files.
	- Include all necessary Qt headers.
	- Connect signals to slots using the modern \`QObject::connect\` syntax.
	- The code must be clean, well-organized, and ready to be compiled with a standard build system (CMake, qmake, etc.).
	5. Output Format:
	- You MUST provide the output as a single, valid JSON object.
	- The keys of the JSON object must be the filenames (e.g., "main.cpp", "mainwindow.h", "mainwindow.cpp").
	- The values must be strings containing the complete, raw source code for the corresponding file.
	- Do not include any explanations, comments, or markdown fences like \`\`\`json.

	Example JSON Output:
	{
	"main.cpp": "#include \\"mainwindow.h\\"\\n#include <QApplication>\\n\\nint main(int argc, char *argv[])\\n{\\n QApplication a(argc, argv);\\n MainWindow w;\\n w.show();\\n return a.exec();\\n}",
	"mainwindow.h": "#ifndef MAINWINDOW_H\\n#define MAINWINDOW_H\\n\\n#include <QMainWindow>\\n\\nclass MainWindow : public QMainWindow\\n{\\n Q_OBJECT\\n\\npublic:\\n MainWindow(QWidget *parent = nullptr);\\n ~MainWindow();\\n};\\n#endif // MAINWINDOW_H",
	"mainwindow.cpp": "#include \\"mainwindow.h\\"\\n\\nMainWindow::MainWindow(QWidget *parent)\\n : QMainWindow(parent)\\n{\\n // UI setup code here\\n}\\n\\nMainWindow::~MainWindow()\\n{\\n}"
	}

	--- APPLICATION SPECIFICATION ---
	`;


	// --- Non-streaming API callers for analysis step ---
	async function callGemini(apiKey: string, prompt: string, imageParts: ImagePart[]): Promise<string> {
	const ai = new GoogleGenAI({ apiKey });
	const parts = [{ text: prompt }, ...imageParts.map(img => ({ inlineData: { mimeType: img.mimeType, data: img.data } }))];
	const response: GenerateContentResponse = await ai.models.generateContent({ model: 'gemini-2.5-flash-preview-04-17', contents: [{ parts }] });
	return response.text;
	}

	async function callOpenAI(apiKey: string, prompt: string, imageParts: ImagePart[]): Promise<string> {
	const content = [{ type: 'text', text: prompt }, ...imageParts.map(img => ({ type: 'image_url', image_url: { url: `data:${img.mimeType};base64,${img.data}` } }))];
	const body = { model: 'gpt-4o', messages: [{ role: 'user', content }], max_tokens: 4096 };
	const response = await fetch('https://api.openai.com/v1/chat/completions', { method: 'POST', headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' }, body: JSON.stringify(body) });
	if (!response.ok) { const err = await response.json(); throw new Error(`OpenAI API Error: ${err?.error?.message \|\| response.statusText}`); }
	const data = await response.json();
	return data.choices?.[0]?.message?.content ?? '';
	}

	async function callAnthropic(apiKey: string, prompt: string, imageParts: ImagePart[]): Promise<string> {
	const content = [{ type: 'text', text: prompt }, ...imageParts.map(img => ({ type: 'image', source: { type: 'base64', media_type: img.mimeType, data: img.data } }))];
	const body = { model: 'claude-3-sonnet-20240229', messages: [{ role: 'user', content }], max_tokens: 4096 };
	const response = await fetch('https://api.anthropic.com/v1/messages', { method: 'POST', headers: { 'x-api-key': apiKey, 'anthropic-version': '2023-06-01', 'Content-Type': 'application/json' }, body: JSON.stringify(body) });
	if (!response.ok) { const err = await response.json(); throw new Error(`Anthropic API Error: ${err?.error?.message \|\| response.statusText}`); }
	const data = await response.json();
	return data.content?.[0]?.text ?? '';
	}

	// --- Unified Streaming API caller ---
	async function callApiStream(
	provider: Provider,
	apiKey: string,
	prompt: string,
	onChunk: (chunk: string) => void
	): Promise<string> {
	const model = provider === 'gemini' ? 'gemini-2.5-flash-preview-04-17' : (provider === 'openai' ? 'gpt-4o' : 'claude-3-sonnet-20240229');
	let fullResponse = '';

	const processChunk = (chunk: string) => {
	fullResponse += chunk;
	onChunk(chunk);
	};

	if (provider === 'gemini') {
	const ai = new GoogleGenAI({ apiKey });
	const response = await ai.models.generateContentStream({ model, contents: prompt });
	for await (const chunk of response) {
	processChunk(chunk.text);
	}
	} else if (provider === 'openai') {
	const body = { model, messages: [{ role: 'user', content: prompt }], max_tokens: 4096, stream: true };
	const response = await fetch('https://api.openai.com/v1/chat/completions', { method: 'POST', headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' }, body: JSON.stringify(body) });
	if (!response.ok \|\| !response.body) { const err = await response.json(); throw new Error(`OpenAI API Error: ${err?.error?.message \|\| response.statusText}`); }

	const reader = response.body.getReader();
	const decoder = new TextDecoder();
	while (true) {
	const { done, value } = await reader.read();
	if (done) break;
	const chunk = decoder.decode(value);
	const lines = chunk.split('\n').filter(line => line.startsWith('data: '));
	for (const line of lines) {
	const message = line.substring(6);
	if (message === '[DONE]') break;
	try {
	const json = JSON.parse(message);
	const textChunk = json.choices[0]?.delta?.content;
	if (textChunk) processChunk(textChunk);
	} catch (e) { /* Ignore parsing errors for incomplete chunks */ }
	}
	}
	} else if (provider === 'anthropic') {
	const body = { model, messages: [{ role: 'user', content: prompt }], max_tokens: 4096, stream: true };
	const response = await fetch('https://api.anthropic.com/v1/messages', { method: 'POST', headers: { 'x-api-key': apiKey, 'anthropic-version': '2023-06-01', 'Content-Type': 'application/json' }, body: JSON.stringify(body) });
	if (!response.ok \|\| !response.body) { const err = await response.json(); throw new Error(`Anthropic API Error: ${err?.error?.message \|\| response.statusText}`); }

	const reader = response.body.getReader();
	const decoder = new TextDecoder();
	let buffer = '';
	while (true) {
	const { done, value } = await reader.read();
	if (done) break;
	buffer += decoder.decode(value, { stream: true });
	const events = buffer.split('\n\n');
	buffer = events.pop() \|\| ''; // Keep the last, possibly incomplete, event in buffer

	for (const event of events) {
	if (!event.startsWith('event: content_block_delta')) continue;
	const dataLine = event.split('\n').find(line => line.startsWith('data: '));
	if (dataLine) {
	try {
	const jsonData = JSON.parse(dataLine.substring(6));
	if (jsonData.type === 'content_block_delta' && jsonData.delta.type === 'text_delta') {
	processChunk(jsonData.delta.text);
	}
	} catch (e) { /* Ignore incomplete JSON */ }
	}
	}
	}
	}
	return fullResponse;
	}


	export async function generateCode(
	appType: AppType,
	provider: Provider,
	apiKey: string,
	imageParts: ImagePart[],
	instructions: string,
	onStatusChange: (status: string) => void,
	onChunk: (chunk: string) => void
	): Promise<string \| void> {
	try {
	const isPython = appType === 'python';
	const analysisPromptTemplate = isPython ? ANALYSIS_PROMPT_PYTHON : ANALYSIS_PROMPT_CPP;
	const codegenPromptTemplate = isPython ? PYQT_CODEGEN_PROMPT : CPP_QT_CODEGEN_PROMPT;

	onStatusChange(`Step 1/2: Analyzing UI with ${provider}...`);
	const analysisPrompt = `${analysisPromptTemplate}${instructions ? `\n\n--- USER REFINEMENT INSTRUCTIONS ---\n${instructions.trim()}` : ''}`;

	const callAnalysisApi = {
	'gemini': callGemini,
	'openai': callOpenAI,
	'anthropic': callAnthropic,
	}[provider];

	const specification = await callAnalysisApi(apiKey, analysisPrompt, imageParts);
	if (!specification \|\| specification.trim() === '') {
	throw new Error('AI failed to generate a UI specification. The response was empty.');
	}

	const lang = isPython ? 'Python' : 'C++';
	onStatusChange(`Step 2/2: Generating ${lang} code with ${provider}...`);
	const codegenPrompt = `${codegenPromptTemplate}\n${specification}`;

	const finalResult = await callApiStream(provider, apiKey, codegenPrompt, (chunk) => {
	if (isPython) {
	// The first chunk from some models might be a markdown fence, remove it.
	const cleanedChunk = chunk.replace(/^```(python)?\n/, '');
	onChunk(cleanedChunk);
	} else {
	// For C++, we just stream raw chunks. The final string will be parsed.
	onChunk(chunk);
	}
	});

	if (isPython) {
	// Final cleanup of trailing markdown fence for Python.
	onChunk('\n```');
	return;
	} else {
	// For C++, return the complete JSON string for parsing in the component.
	return finalResult;
	}

	} catch (error) {
	console.error(`Error during code generation with ${provider}:`, error);
	if (error instanceof Error) {
	throw new Error(`Failed to communicate with the ${provider} API. ${error.message}`);
	}
	throw new Error("An unknown error occurred while generating code.");
	}
	}