NishantD commited on
Commit
aca6900
·
verified ·
1 Parent(s): 4129883

Upload 5 files

Browse files
Files changed (5) hide show
  1. LICENSE +21 -0
  2. README.md +2 -12
  3. api.py +44 -0
  4. app.py +60 -0
  5. requirements.txt +8 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 AI Anytime
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,2 @@
1
- ---
2
- title: ImageTest
3
- emoji: 🐨
4
- colorFrom: indigo
5
- colorTo: purple
6
- sdk: streamlit
7
- sdk_version: 1.38.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # Visual-Question-Answering-API-and-App
2
+ Visual Question Answering API and App using ViLT, Fast API, and Streamlit.
 
 
 
 
 
 
 
 
 
 
api.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile
2
+ from fastapi.responses import JSONResponse, RedirectResponse
3
+ from transformers import ViltProcessor, ViltForQuestionAnswering
4
+ from PIL import Image
5
+ import requests
6
+ import io
7
+
8
+ app = FastAPI(title="Visual Question and Answering API", version="0.0.1")
9
+
10
+ #Loading the model and tokenizer
11
+ processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
12
+ model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
13
+
14
+ def get_answer(image, text):
15
+ try:
16
+ # Load and process the image
17
+ img = Image.open(io.BytesIO(image)).convert("RGB")
18
+
19
+ # Prepare inputs
20
+ encoding = processor(img, text, return_tensors="pt")
21
+
22
+ # Forward pass
23
+ outputs = model(**encoding)
24
+ logits = outputs.logits
25
+ idx = logits.argmax(-1).item()
26
+ answer = model.config.id2label[idx]
27
+
28
+ return answer
29
+
30
+ except Exception as e:
31
+ return str(e)
32
+
33
+ @app.get("/", include_in_schema=False)
34
+ async def index():
35
+ return RedirectResponse(url="/docs")
36
+
37
+ @app.post("/answer")
38
+ async def process_image(image: UploadFile = File(...), text: str = None):
39
+ try:
40
+ answer = get_answer(await image.read(), text)
41
+ return JSONResponse({"Answer": answer})
42
+
43
+ except Exception as e:
44
+ return JSONResponse({"Sorry, please reach out to the Admin!": str(e)})
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import requests
4
+ from io import BytesIO
5
+ from transformers import ViltProcessor, ViltForQuestionAnswering
6
+
7
+ # Set page layout to wide
8
+ st.set_page_config(layout="wide")
9
+
10
+ processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
11
+ model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
12
+
13
+ def get_answer(image, text):
14
+ try:
15
+ # Load and process the image
16
+ img = Image.open(BytesIO(image)).convert("RGB")
17
+
18
+ # Prepare inputs
19
+ encoding = processor(img, text, return_tensors="pt")
20
+
21
+ # Forward pass
22
+ outputs = model(**encoding)
23
+ logits = outputs.logits
24
+ idx = logits.argmax(-1).item()
25
+ answer = model.config.id2label[idx]
26
+
27
+ return answer
28
+
29
+ except Exception as e:
30
+ return str(e)
31
+
32
+ # Set up the Streamlit app
33
+ st.title("Visual Question Answering")
34
+ st.write("Upload an image and enter a question to get an answer.")
35
+
36
+ # Create columns for image upload and input fields
37
+ col1, col2 = st.columns(2)
38
+
39
+ # Image upload
40
+ with col1:
41
+ uploaded_file = st.file_uploader("Upload Image", type=["jpg", "jpeg", "png"])
42
+ st.image(uploaded_file, use_column_width=True)
43
+
44
+ # Question input
45
+ with col2:
46
+ question = st.text_input("Question")
47
+
48
+ # Process the image and question when both are provided
49
+ if uploaded_file and question is not None:
50
+ if st.button("Ask Question"):
51
+ image = Image.open(uploaded_file)
52
+ image_byte_array = BytesIO()
53
+ image.save(image_byte_array, format='JPEG')
54
+ image_bytes = image_byte_array.getvalue()
55
+
56
+ # Get the answer
57
+ answer = get_answer(image_bytes, question)
58
+
59
+ # Display the answer
60
+ st.success("Answer: " + answer)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ requests
4
+ Pillow
5
+ fastapi
6
+ uvicorn
7
+ streamlit
8
+ python-multipart