jonathanagustin commited on
Commit
726d8f2
Β·
verified Β·
1 Parent(s): 077b103

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +18 -5
  2. app.py +63 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,12 +1,25 @@
1
  ---
2
  title: Vision Chat
3
- emoji: ⚑
4
- colorFrom: purple
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 6.0.2
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  title: Vision Chat
3
+ emoji: πŸ‘οΈ
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
  sdk: gradio
7
+ sdk_version: "6.0.2"
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ ## πŸ‘οΈ Vision Chat
14
+
15
+ Get captions and ask questions about images using BLIP and ViLT.
16
+
17
+ ## Features
18
+
19
+ - Automatic image captioning
20
+ - Visual question answering
21
+ - No model downloads - uses API
22
+
23
+ ## Setup
24
+
25
+ Add your `HF_TOKEN` as a Secret in Space Settings.
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from huggingface_hub import InferenceClient
4
+
5
+ # Get token from environment (set in HF Space secrets)
6
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
7
+ client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
8
+
9
+
10
+ def caption_image(image):
11
+ """Generate a caption for the image."""
12
+ if image is None:
13
+ return "πŸ“· Upload an image first!"
14
+
15
+ try:
16
+ result = client.image_to_text(
17
+ image,
18
+ model="Salesforce/blip-image-captioning-base",
19
+ )
20
+ return result.generated_text
21
+ except Exception as e:
22
+ return f"❌ Error: {e}"
23
+
24
+
25
+ def answer_question(image, question: str):
26
+ """Answer a question about the image."""
27
+ if image is None:
28
+ return "πŸ“· Upload an image first!"
29
+ if not question.strip():
30
+ return "❓ Ask a question!"
31
+
32
+ try:
33
+ result = client.visual_question_answering(
34
+ image=image,
35
+ question=question,
36
+ model="dandelin/vilt-b32-finetuned-vqa",
37
+ )
38
+ top = result[0]
39
+ return f"πŸ€– {top.answer} (confidence: {top.score:.1%})"
40
+ except Exception as e:
41
+ return f"❌ Error: {e}"
42
+
43
+
44
+ with gr.Blocks(title="Vision Chat") as demo:
45
+ gr.Markdown("# πŸ‘οΈ Vision Chat\nUpload an image, get a caption, and ask questions about it!")
46
+
47
+ with gr.Row(equal_height=True):
48
+ with gr.Column(scale=1):
49
+ img = gr.Image(type="pil", label="πŸ“· Your Image")
50
+ caption_btn = gr.Button("✨ Generate Caption", variant="primary")
51
+
52
+ with gr.Column(scale=1):
53
+ caption_out = gr.Textbox(label="Caption", lines=2, interactive=False)
54
+ question = gr.Textbox(label="❓ Ask a question", placeholder="What color is the animal?")
55
+ ask_btn = gr.Button("Ask", variant="secondary")
56
+ answer_out = gr.Textbox(label="Answer", lines=2, interactive=False)
57
+
58
+ caption_btn.click(caption_image, inputs=img, outputs=caption_out)
59
+ ask_btn.click(answer_question, inputs=[img, question], outputs=answer_out)
60
+ question.submit(answer_question, inputs=[img, question], outputs=answer_out)
61
+
62
+ demo.queue()
63
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=6.0.0
2
+ huggingface_hub>=0.23.0
3
+ pillow>=10.0.0
4
+