arteeguz commited on
Commit
c4ceecb
·
1 Parent(s): 085b0f7

app.py added

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +56 -0
  3. narrator.jpeg +0 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import find_dotenv, load_dotenv
2
+ from transformers import pipeline
3
+ import os
4
+ import requests
5
+ import streamlit as st
6
+
7
+ load_dotenv(find_dotenv())
8
+ HUGGINGFACE_API_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
9
+
10
+ pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
11
+
12
+ #img to text
13
+ def img_to_text(url):
14
+ text = pipe(url)[0]["generated_text"]
15
+ print(text)
16
+ return text
17
+
18
+ def text_to_speech(message):
19
+ API_URL = "https://api-inference.huggingface.co/models/espnet/kan-bayashi_ljspeech_vits"
20
+ headers = {"Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"}
21
+ payloads = {
22
+ "inputs":message
23
+ }
24
+
25
+ response = requests.post(API_URL, headers=headers, json=payloads)
26
+ with open('audio.flac', 'wb') as file:
27
+ file.write(response.content)
28
+
29
+ def main():
30
+ st.set_page_config(page_title="Image to Text", page_icon="🎙️")
31
+
32
+ st.header("Image to Text")
33
+ # Image.
34
+ image = "narrator.jpeg"
35
+ left_co, cent_co, last_co = st.columns(3)
36
+ with cent_co:
37
+ st.image(image=image)
38
+ uploaded_file = st.file_uploader("Choose an image: ", type=["jpg", "jpeg", "png"])
39
+
40
+ if uploaded_file is not None:
41
+ print(uploaded_file)
42
+ bytes_data = uploaded_file.getvalue()
43
+ with open(uploaded_file.name, "wb") as file:
44
+ file.write(bytes_data)
45
+ st.image(uploaded_file, caption='Uploaded image', use_column_width=True)
46
+ scenario=img_to_text(uploaded_file.name)
47
+ text_to_speech(scenario)
48
+
49
+ with st.expander("scenatio"):
50
+ st.write(scenario)
51
+
52
+ st.audio("audio.flac")
53
+
54
+
55
+ if __name__== "__main__":
56
+ main()
narrator.jpeg ADDED