Spaces:

Claudz163
/

Image-To-Character-Text

Running

App Files Files Community

Claudz163 commited on Nov 1, 2024

Commit

f9d8d7e

1 Parent(s): 321df2e

added main

Browse files

Files changed (1) hide show

app.py +58 -45

app.py CHANGED Viewed

@@ -5,49 +5,62 @@ from PIL import Image
 import os
-api_key = os.getenv("HUGGINGFACE_TOKEN")
-client = InferenceClient(api_key=api_key)
-st.header("Character Captions (IN PROGRESS!)")
-st.write("Have a character caption any image you upload!")
-character = st.selectbox("Choose a character", ["rapper", "shrek", "unintelligible"])
-uploaded_img = st.file_uploader("Upload an image")
-if uploaded_img is not None:
-    image = Image.open(uploaded_img)
-    st.image(image)
-    image_captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
-    response = image_captioner(image)
-    caption = response[0]['generated_text']
-    character_prompts = {
-        "rapper": f"Describe this scene like you're a rapper: {caption}.",
-        "shrek": f"Describe this scene like you're Shrek: {caption}.",
-        "unintelligible": f"Describe this scene in a way that makes no sense: {caption}."
-    }
-    prompt = character_prompts[character]
-    messages = [
-        { "role": "user", "content": prompt }
-    ]
-    stream = client.chat.completions.create(
-        model="meta-llama/Llama-3.2-3B-Instruct",
-        messages=messages,
-        max_tokens=500,
-        stream=True
-    )
-    response = ''
-    for chunk in stream:
-        response += chunk.choices[0].delta.content
-    st.write(response)

 import os
+def initialize():
+    if 'initialized' not in st.session_state: # Initialize only once
+        print("Initializing...")
+        st.session_state['initialized'] = True
+        st.session_state['api_key'] = os.getenv("HUGGINGFACE_TOKEN")
+        st.session_state['client'] = InferenceClient(api_key=st.session_state['api_key'])
+def main():
+    initialize()
+    st.header("Character Captions")
+    st.write("Have a character caption any image you upload!")
+    character = st.selectbox("Choose a character", ["rapper", "shrek", "unintelligible", "cookie monster"])
+    uploaded_img = st.file_uploader("Upload an image")
+    if uploaded_img is not None:
+        # Open Image
+        image = Image.open(uploaded_img)
+        st.image(image)
+        # Get caption from image
+        image_captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large")
+        response = image_captioner(image)
+        caption = response[0]['generated_text']
+        # Pass the caption to a character prompt
+        character_prompts = {
+            "rapper": f"Describe this caption like you're a rapper: {caption}.",
+            "shrek": f"Describe this caption like you're Shrek: {caption}.",
+            "unintelligible": f"Describe this caption in a way that makes no sense: {caption}.",
+            "cookie monster": f"Describe this caption like you're cookie monster: {caption}."
+        }
+        prompt = character_prompts[character]
+        messages = [
+            { "role": "user", "content": prompt }
+        ]
+        # Pass to Llama for character output regarding image caption
+        stream = st.session_state['client'].chat.completions.create(
+            model="meta-llama/Llama-3.2-3B-Instruct",
+            messages=messages,
+            max_tokens=500,
+            stream=True
+        )
+        response = ''
+        for chunk in stream:
+            response += chunk.choices[0].delta.content
+        st.write(response)
+if __name__ == '__main__':
+    main()