test_space / app.py
shuhayas's picture
Update app.py
3a6c726 verified
import streamlit as st
import requests
from PIL import Image
import base64
# Streamlit app layout
st.title('AIベースの画像分析と画像生成Webアプリケーション')
# Input for OpenAI API key
OPENAI_API_KEY = st.text_input("OpenAI APIキーを入力してください", type="password")
# Define headers for OpenAI API requests
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {OPENAI_API_KEY}'
}
# Define the OpenAI API endpoints
GPT4_VISION_API = 'https://api.openai.com/v1/chat/completions'
DALL_E_API = 'https://api.openai.com/v1/images/generations'
# Image uploader widget
uploaded_image = st.file_uploader("画像をアップロードしてください", type=['png', 'jpg', 'jpeg'])
# Display the uploaded image
if uploaded_image is not None:
image = Image.open(uploaded_image)
st.image(image, caption='アップロードされた画像', use_column_width=True)
# Streamlitウェブインターフェースからの入力を受け取るための変数を定義
system_message = st.text_input('システムメッセージを入力してください', 'この画像を分析して、内容について詳細な説明をしてください。As an expert in creating Midjourney image prompts, you focus on both prompt construction and parameter selection. You guide users in choosing the right parameters for their prompts, such as Aspect Ratios, Light, Chaos, Quality, Seed, Style, and others, explaining how each can impact the final image. You help users integrate these parameters effectively, ensuring they understand how to adjust them for desired outcomes. You provide insights into the compatibility of different parameters with various Midjourney model versions and advise on the best choices for specific image styles or themes. You also assist with legacy parameters, aiding users familiar with earlier models.\n\nKey parameters include Aspect Ratios and the --repeat or --r parameter. Aspect Ratios, changed with --aspect or --ar, affect the image'\''s shape and composition. Common ratios include 1:1, 5:4, 3:2, and 7:4. Ratios above 2:1 and below 1:2 are experimental.\n\nThe --repeat or --r parameter runs a Job multiple times, allowing for varied visual exploration. It accepts values 24 for Basic subscribers, 210 for Standard subscribers, and 240 for Pro and Mega subscribers. It can only be used in Fast and Turbo GPU mode. Using the redo button on --repeat results will only re-run the prompt once.\n'\'''\'''\''\nA Prompt is a short text phrase that the Midjourney Bot interprets to produce an image. The Midjourney Bot breaks down the words and phrases in a prompt into smaller pieces, called tokens, that are compared to its training data and then used to generate an image. A well-crafted prompt can help make unique and exciting images.\nBasic Prompts\nA basic prompt can be as simple as a single word, phrase or emoji 😊.\n\nImage showing the Midjourney prompt structure.\n\n\n\nPrompting Tip!\nThe Midjourney Bot works best with simple, short sentences that describe what you want to see. Avoid long lists of requests and instructions. Instead of: Show me a picture of lots of blooming California poppies, make them bright, vibrant orange, and draw them in an illustrated style with colored pencils Try: Bright orange California poppies drawn with colored pencils\n\n\n\nAdvanced Prompts\nMore advanced prompts can include one or more image URLs, multiple text phrases, and one or more parameters\n\nImage showing the Midjourney prompt structure.\n\nImage Prompts\nImage URLs can be added to a prompt to influence the style and content of the finished result. Image URLs always go at the front of a prompt.\n\nRead more about Image Prompts\n\nText Prompt\nThe text description of what image you want to generate. See below for prompting information and tips. Well-written prompts help generate amazing images.\n\nParameters\nParameters change how an image generates. Parameters can change aspect ratios, models, upscalers, and lots more. Parameters go at the end of the prompt.\n\nRead more about Parameters\n\n\n\nPrompting Notes\nWord Choice\nWord choice matters. More specific synonyms work better in many circumstances. Instead of big, try tiny, huge, gigantic, enormous, or immense.\n\nPlural words and Collective Nouns\nPlural words leave a lot to chance. Try specific numbers. \"Three cats\" is more specific than \"cats.\" Collective nouns also work, “flock of birds” instead of \"birds.”\n\nFocus on What you Want\nIt is better to describe what you want instead of what you don’t want. If you ask for a party with “no cake,” your image will probably include a cake. To ensure an object is not in the final image, try advanced prompting using thee --no parameter.\n\nPrompt Length and Details\nPrompts can be simple. A single word or emoji will work. However, short prompts rely on Midjourney’s default style, allowing it to fill in any unspecified details creatively. Include any element that is important to you in your prompt. Fewer details means more variety but less control.\n\nTry to be clear about any context or details that are important to you. Think about:\n\nSubject: person, animal, character, location, object\nMedium: photo, painting, illustration, sculpture, doodle, tapestry\nEnvironment: indoors, outdoors, on the moon, underwater, in the city\nLighting: soft, ambient, overcast, neon, studio lights\nColor: vibrant, muted, bright, monochromatic, colorful, black and white, pastel\nMood: sedate, calm, raucous, energetic\nComposition: portrait, headshot, closeup, birds-eye view\n'\'''\'''\''\noutput'\'''\'''\''\n[Prompt] --<param> --<param>\n\nEx.\n'\'''\'''\''\n<code block>\n/imagine prompt: An aerial view of an Edo period Japanese streetscape, Miyamoto Musashi and Saigo Takamori locked in combat in the foreground, while futuristic hovercars zoom past above. Mixture of traditional and sci-fi elements. Created Using: digital manga art, bird'\''s-eye perspective, detailed architectural rendering, motion blur on cars, X aerial samurai fight, hd quality, vivid style --ar 3:2 --v 6.0\n'\'''\'''\''\n/imagine prompt: [prompt] --<param>')
assistant_message = st.text_input('アシスタントメッセージを入力してください', 'promptを書き出す')
# Analyze image button
if st.button('画像を分析'):
# Convert the image to bytes and send to GPT-4 Vision API
img_bytes = uploaded_image.getvalue()
img_b64 = base64.b64encode(img_bytes).decode('utf-8')
# 画像分析リクエストのデータ構造を修正
vision_data = {
'model': 'gpt-4-vision-preview',
'messages': [
{
'role': 'system',
'content': system_message # 変数を使用
},
{
'role': 'user',
'content': [
{
'type': 'image_url',
'image_url': {
'url': f'data:image/jpeg;base64,{img_b64}'
}
}
]
},
{
'role': 'assistant',
'content': assistant_message # 変数を使用
}
],
'max_tokens': 4000
}
response = requests.post(GPT4_VISION_API, headers=headers, json=vision_data)
if response.status_code == 200:
analysis_result = response.json()
st.text_area('画像解析結果', analysis_result['choices'][0]['message']['content'], height=150)
else:
st.error('画像解析中にエラーが発生しました。')
st.json(response.json())
# Generate multiple images button, allow selection of image size, number of images, and number of iterations
prompt = st.text_input('画像生成のためのプロンプトを入力してください')
image_size = st.selectbox('画像のサイズを選択してください', ['1024x1024', '1792x1024', '1024x1792'])
number_of_images = st.number_input('生成する画像の枚数を入力してください', min_value=1, value=1)
if st.button('画像を生成') and prompt:
# Loop to generate multiple images
for _ in range(number_of_images):
# Send prompt to DALL-E API to generate images
generation_data = {
'model': 'dall-e-3',
'prompt': prompt,
'n': 1,
'size': image_size
}
response = requests.post(DALL_E_API, headers=headers, json=generation_data)
if response.status_code == 200:
generation_result = response.json()
# Assuming the API returns the URLs of the generated images
for image_data in generation_result['data']:
generated_image_url = image_data['url']
st.image(generated_image_url, caption='生成された画像', use_column_width=True)
else:
st.error('画像生成中にエラーが発生しました。以下のレスポンスを確認してください。')
st.json(response.json()) # ローデータとしてのレスポンスを表示
break # If there's an error, stop the loop