Spaces:

Jiangxz01
/

AI_Visual_Storytelling

Runtime error

App Files Files

Jiangxz01 commited on Sep 18, 2024

Commit

41f5990

verified ·

1 Parent(s): f5db0ff

Upload 2 files

Browse files

AI Visual Storytelling

Files changed (2) hide show

app.py +319 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,319 @@

+# -*- coding: utf-8 -*-
+# 財政部財政資訊中心 江信宗
+import streamlit as st
+import requests
+from PIL import Image
+import io
+import base64
+import time
+import uuid
+import json
+from gtts import gTTS
+import os
+from litellm import completion
+from dotenv import load_dotenv
+load_dotenv()
+def compress_image(image, max_size=(800, 800), quality=95):
+    img_copy = image.copy()
+    img_copy.thumbnail(max_size)
+    buffered = io.BytesIO()
+    img_copy.save(buffered, format="JPEG", quality=quality)
+    return buffered.getvalue()
+def analyze_image(image, api_key, model):
+    compressed_image = compress_image(image)
+    img_str = base64.b64encode(compressed_image).decode()
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Carefully observe this image and describe it in as much detail as possible. Please address the following aspects: primary subject matter, background setting, color palette, emotional conveyance, and specific details."},
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{img_str}"
+                    }
+                }
+            ]
+        }
+    ]
+    response = completion(model=model, messages=messages, max_tokens=1024)
+    return response.choices[0].message.content.strip()
+def translate_to_chinese(text, api_key, model):
+    if "groq/" in model:
+        translation_model = "groq/gemma2-9b-it"
+    else:
+        translation_model = model
+    messages = [
+        {
+            "role": "system",
+            "content": "You are an expert translator proficient in both Traditional Chinese and English, with 40 years of translation experience and extensive cross-disciplinary knowledge. You have been deeply involved in the Chinese translations of The New York Times and Bloomberg, and have a deep understanding of the translation of current events and academic papers. I would like you to translate the following English text into Traditional Chinese, with a style similar to the Chinese versions of the aforementioned magazines. I would like to request a translation of the following English content into Traditional Chinese. Please ensure that the translation is accurate and natural-sounding."
+        },
+        {
+            "role": "user",
+            "content": f"THAT'S IMPORTANT OTHERWISE I'LL DIE. Translate the Text ``` {text} ``` into \"Traditional Chinese\". Must reply to me in Traditional Chinese."
+        }
+    ]
+    response = completion(model=translation_model, messages=messages, max_tokens=1024)
+    return response.choices[0].message.content.strip()
+def resize_image(image, target_height=400):
+    original_width, original_height = image.size
+    aspect_ratio = original_width / original_height
+    target_width = int(target_height * aspect_ratio)
+    resized_image = image.resize((target_width, target_height), Image.LANCZOS)
+    return resized_image
+def main():
+    st.set_page_config(
+        layout="wide",
+        page_title="AI-Powered Visual Storytelling",
+        page_icon="🖼️",
+        menu_items={
+            'Get Help': None,
+            'Report a bug': None,
+            'About': '# 圖片AI辨識應用\n使用AI分析圖片內容之網頁程式。'
+        }
+    )
+    st.markdown("""
+    <style>
+    .stApp {
+        background-image: linear-gradient(to bottom, #e6f3ff, #ffffff);
+    }
+    .stTitle, .stMarkdown, .stRadio, .stFileUploader, .stTextInput > label, p {
+        color: black !important;
+    }
+    .stTitle h1 {
+        color: black !important;
+    }
+    .stButton>button {
+        background-color: #3498db;
+        color: white;
+    }
+    .stTextInput>div>div>input {
+        background-color: #ecf0f1;
+        color: #2c3e50;
+    }
+    .custom-image-container {
+        border: 2px solid #bdc3c7;
+        border-radius: 10px;
+        overflow: hidden;
+    }
+    .custom-image {
+        width: 100%;
+        height: 400px;
+        object-fit: cover;
+        border-radius: 10px;
+    }
+    .description-box {
+        background-color: rgba(52, 152, 219, 0.1);
+        border-left: 5px solid #3498db;
+        padding: 12px;
+        border-radius: 0 6px 6px 0;
+        transition: all 0.3s ease;
+        margin-bottom: 5px;
+    }
+    .description-box:hover {
+        background-color: rgba(52, 152, 219, 0.2);
+        box-shadow: 0 0 10px rgba(52, 152, 219, 0.5);
+    }
+    .description-box p {
+        color: #2c3e50;
+        font-size: 16px;
+        line-height: 1.6;
+        transition: all 0.3s ease;
+    }
+    .description-box:hover p {
+        font-weight: bold;
+    }
+    .info-box {
+        background-color: rgba(52, 152, 219, 0.1);
+        border-left: 5px solid #3498db;
+        padding: 10px;
+        border-radius: 0 10px 10px 0;
+        transition: all 0.3s ease;
+        margin-bottom: 5px;
+    }
+    .info-box:hover {
+        background-color: rgba(52, 152, 219, 0.2);
+        box-shadow: 0 0 10px rgba(52, 152, 219, 0.5);
+    }
+    .info-box p {
+        color: #2c3e50;
+        font-size: 16px;
+        line-height: 1.6;
+        transition: all 0.3s ease;
+        margin: 0;
+    }
+    .info-box:hover p {
+        font-weight: bold;
+    }
+    .stTextInput > div > div > input {
+        background-color: #ffffff;
+        color: #2c3e50;
+        border: 2px solid #3498db;
+        border-radius: 5px;
+        padding: 8px 12px;
+    }
+    .stButton > button {
+        background-color: #3498db;
+        color: white;
+        border: none;
+        border-radius: 5px;
+        padding: 8px 16px;
+        font-weight: bold;
+        transition: all 0.3s ease;
+    }
+    .stButton > button:hover {
+        background-color: #2980b9;
+        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
+    }
+    [data-testid=stSidebar] {
+        background-color: #f0f8ff;
+        padding: 20px;
+    }
+    [data-testid=stSidebar] .stTitle h1 {
+        color: #2c3e50 !important;
+        font-size: 24px;
+        margin-bottom: 20px;
+    }
+    .main-content {
+        padding-left: 0 !important;
+    }
+    .stColumns {
+        gap: 1rem !important;
+    }
+    .streamlit-expanderHeader {
+        background-color: #3498db;
+        color: white !important;
+        border-radius: 5px;
+        padding: 10px 15px;
+        font-weight: bold;
+        transition: all 0.3s ease;
+    }
+    .streamlit-expanderHeader:hover {
+        background-color: #2980b9;
+        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
+    }
+    .streamlit-expanderContent {
+        border: 1px solid #3498db;
+        border-radius: 0 0 5px 5px;
+        padding: 10px;
+    }
+    </style>
+    <script>
+    const mutationObserver = new MutationObserver(function(mutations) {
+        mutations.forEach(function(mutation) {
+            if (mutation.type === 'childList') {
+                const descriptionBoxes = document.querySelectorAll('.description-box');
+                descriptionBoxes.forEach(box => {
+                    const paragraphs = box.querySelectorAll('p');
+                    paragraphs.forEach(p => {
+                        p.textContent = p.textContent.replace(/^<strong>|<\/strong>$/g, '');
+                    });
+                });
+            }
+        });
+    });
+    mutationObserver.observe(document.body, {
+        childList: true,
+        subtree: true
+    });
+    </script>
+    """, unsafe_allow_html=True)
+    with st.sidebar:
+        st.title("🖼️ 圖片分析")
+        if 'uploaded_files' not in st.session_state:
+            st.session_state.uploaded_files = []
+        new_uploads = st.file_uploader("新增/刪除圖片", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
+        current_files = {f.name: f for f in new_uploads} if new_uploads else {}
+        st.session_state.uploaded_files = [f for f in st.session_state.uploaded_files if f.name in current_files]
+        for file_name, file in current_files.items():
+            if file_name not in [f.name for f in st.session_state.uploaded_files]:
+                st.session_state.uploaded_files.append(file)
+        uploaded_files = st.session_state.uploaded_files
+        with st.expander("詮釋圖片語言", expanded=False):
+            language = st.radio("", ["繁體中文", "English"], index=0)
+        st.markdown("### 🤖 Model Settings")
+        model_options = ["gpt-4o", "gemini-1.5-pro", "gpt-4o-mini", "custom"]
+        selected_model = st.selectbox("Select Model", model_options)
+        if selected_model == "custom":
+            custom_model = st.text_input("Enter custom model name")
+            model = custom_model if custom_model else "groq/llava-v1.5-7b-4096-preview"
+        else:
+            model = selected_model
+        st.markdown("### 🔑 API Settings")
+        api_key = st.text_input("API Key", type="password", value=os.getenv("OPENAI_API_KEY", ""))
+        api_base = st.text_input("API Base URL", value=os.getenv("OPENAI_API_BASE", "")) or "https://api.groq.com/openai/v1/"
+        if st.button("Save API Settings"):
+            os.environ["OPENAI_API_KEY"] = api_key
+            os.environ["OPENAI_API_BASE"] = api_base
+            st.success("API settings saved successfully")
+        st.markdown("""
+        <div class="info-box">
+            <p>系統部署：江信宗<br>Vision Language Models</p>
+        </div>
+        """, unsafe_allow_html=True)
+    st.markdown('<div class="main-content">', unsafe_allow_html=True)
+    st.title("🌄 AI-Powered Visual Storytelling")
+    if api_key and uploaded_files:
+        if 'analyzed_files' not in st.session_state:
+            st.session_state.analyzed_files = {}
+        files_to_remove = set(st.session_state.analyzed_files.keys()) - set(f.name for f in uploaded_files)
+        for file_name in files_to_remove:
+            del st.session_state.analyzed_files[file_name]
+        for i in range(0, len(uploaded_files), 2):
+            img_col1, img_col2 = st.columns(2)
+            for j in range(2):
+                if i + j < len(uploaded_files):
+                    with img_col1 if j == 0 else img_col2:
+                        uploaded_file = uploaded_files[i + j]
+                        image = Image.open(uploaded_file)
+                        resized_image = resize_image(image)
+                        buffered = io.BytesIO()
+                        resized_image.save(buffered, format="PNG")
+                        img_str = base64.b64encode(buffered.getvalue()).decode()
+                        st.markdown(f"""
+                        <div class="custom-image-container">
+                            <img src="data:image/png;base64,{img_str}" class="custom-image">
+                        </div>
+                        <p style="text-align: center; color: black;">{uploaded_file.name}</p>
+                        """, unsafe_allow_html=True)
+                        if uploaded_file.name not in st.session_state.analyzed_files:
+                            with st.spinner("分析圖片及生成語音中..."):
+                                try:
+                                    description = analyze_image(image, api_key, model)
+                                    if language == "繁體中文":
+                                        with st.spinner("翻譯中..."):
+                                            description = translate_to_chinese(description, api_key, model)
+                                    st.session_state.analyzed_files[uploaded_file.name] = description
+                                    time.sleep(1)
+                                except Exception as e:
+                                    st.error(f"處理圖片時發生錯誤: {str(e)}")
+                                    continue
+                        description = st.session_state.analyzed_files[uploaded_file.name]
+                        paragraphs = [p.strip() for p in description.split('\n') if p.strip()]
+                        if paragraphs:
+                            formatted_description = ''.join([f'<p style="margin: 0;">{p}</p>' for p in paragraphs])
+                            st.markdown(f'<div class="description-box">{formatted_description}</div>', unsafe_allow_html=True)
+                            tts = gTTS(text=description, lang='zh-tw' if language == "繁體中文" else 'en')
+                            audio_file = f"audio_{uuid.uuid4()}.mp3"
+                            tts.save(audio_file)
+                            st.audio(audio_file)
+                            os.remove(audio_file)
+                        else:
+                            st.warning("無法獲取圖片描述。")
+    elif uploaded_files:
+        st.warning("請輸入有效的 API Key 以分析圖片。")
+    st.markdown('</div>', unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ litellm
2	+ gTTS