Spaces:
Runtime error
Runtime error
Merge branch 'main' of https://github.com/gamingflexer/Catalog-Digitization-
Browse files- src/module/audio_text.py +49 -0
- src/module/image_enhance.py +34 -5
src/module/audio_text.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# from whisper_jax import FlaxWhisperPipline
|
| 2 |
+
# import jax.numpy as jnp
|
| 3 |
+
import whisper
|
| 4 |
+
print(whisper.__file__)
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
from config import OPENAI_API_KEY
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
client = OpenAI()
|
| 10 |
+
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def whisper_pipeline_tpu(audio):
|
| 14 |
+
pipeline = FlaxWhisperPipline("openai/whisper-large-v3", dtype=jnp.bfloat16, batch_size=16)
|
| 15 |
+
text = pipeline(audio)
|
| 16 |
+
return text
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def whisper_pipeline(audio_path):
|
| 21 |
+
model = whisper.load_model("medium")
|
| 22 |
+
# load audio and pad/trim it to fit 30 seconds
|
| 23 |
+
audio = whisper.load_audio(audio_path)
|
| 24 |
+
audio = whisper.pad_or_trim(audio)
|
| 25 |
+
# make log-Mel spectrogram and move to the same device as the model
|
| 26 |
+
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
| 27 |
+
# detect the spoken language
|
| 28 |
+
_, probs = model.detect_language(mel)
|
| 29 |
+
print(f"Detected language: {max(probs, key=probs.get)}")
|
| 30 |
+
# decode the audio
|
| 31 |
+
options = whisper.DecodingOptions()
|
| 32 |
+
result = whisper.decode(model, mel, options)
|
| 33 |
+
# print the recognized text
|
| 34 |
+
print(result.text)
|
| 35 |
+
return result.text
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def whisper_openai(audio_path):
|
| 42 |
+
audio_file= open(audio_path, "rb")
|
| 43 |
+
transcript = client.audio.transcriptions.create(
|
| 44 |
+
model="whisper-1",
|
| 45 |
+
file=audio_file
|
| 46 |
+
)
|
| 47 |
+
return transcript
|
| 48 |
+
|
| 49 |
+
whisper_pipeline()
|
src/module/image_enhance.py
CHANGED
|
@@ -2,6 +2,8 @@ import cv2
|
|
| 2 |
import os
|
| 3 |
from config import file_Directory
|
| 4 |
import numpy as np
|
|
|
|
|
|
|
| 5 |
class Image_Enhance():
|
| 6 |
|
| 7 |
def __init__(self, image_path) -> None:
|
|
@@ -11,9 +13,9 @@ class Image_Enhance():
|
|
| 11 |
# Load the image
|
| 12 |
image = cv2.imread(self.image_path)
|
| 13 |
#Plot the original image
|
| 14 |
-
alpha = 1.
|
| 15 |
# control brightness by 50
|
| 16 |
-
beta =
|
| 17 |
image2 = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
|
| 18 |
#Save the image
|
| 19 |
# imagepth = os.path.join(os.path.dirname(self.image_path), 'Brightness & contrast.jpg')
|
|
@@ -21,6 +23,32 @@ class Image_Enhance():
|
|
| 21 |
cv2.imwrite(imagepth, image2)
|
| 22 |
return imagepth
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
def sharpen(self, imagepth):
|
| 25 |
image = cv2.imread(imagepth)
|
| 26 |
# Create the sharpening kernel
|
|
@@ -74,9 +102,10 @@ class Image_Enhance():
|
|
| 74 |
cv2.imwrite(imagepath, image2)
|
| 75 |
|
| 76 |
|
| 77 |
-
obj = Image_Enhance(r"
|
| 78 |
pth = obj.brightness_Adjust()
|
| 79 |
sharpen = obj.sharpen(pth)
|
| 80 |
lapacian_sharpen = obj.lapacian_sharpen(sharpen)
|
| 81 |
-
noise = obj.removing_noise(
|
| 82 |
-
obj.enhance_color(noise)
|
|
|
|
|
|
| 2 |
import os
|
| 3 |
from config import file_Directory
|
| 4 |
import numpy as np
|
| 5 |
+
from PIL import Image
|
| 6 |
+
|
| 7 |
class Image_Enhance():
|
| 8 |
|
| 9 |
def __init__(self, image_path) -> None:
|
|
|
|
| 13 |
# Load the image
|
| 14 |
image = cv2.imread(self.image_path)
|
| 15 |
#Plot the original image
|
| 16 |
+
alpha = -1.1
|
| 17 |
# control brightness by 50
|
| 18 |
+
beta = 70
|
| 19 |
image2 = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
|
| 20 |
#Save the image
|
| 21 |
# imagepth = os.path.join(os.path.dirname(self.image_path), 'Brightness & contrast.jpg')
|
|
|
|
| 23 |
cv2.imwrite(imagepth, image2)
|
| 24 |
return imagepth
|
| 25 |
|
| 26 |
+
def remove_flash(self, imagepth):
|
| 27 |
+
image = cv2.imread(imagepth)
|
| 28 |
+
# cv2.cvtColor is applied over the
|
| 29 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 30 |
+
|
| 31 |
+
# Apply adaptive thresholding to segment the text
|
| 32 |
+
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 4)
|
| 33 |
+
|
| 34 |
+
# Apply Gaussian blur to the grayscale image to reduce noise
|
| 35 |
+
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
|
| 36 |
+
|
| 37 |
+
# Threshold the blurred image to create a binary mask for the flashlight glare
|
| 38 |
+
_, mask = cv2.threshold(blurred, 240, 255, cv2.THRESH_BINARY_INV)
|
| 39 |
+
|
| 40 |
+
# Combine the text and glare masks
|
| 41 |
+
mask = cv2.bitwise_or(mask, thresh)
|
| 42 |
+
|
| 43 |
+
# Apply morphological closing to further remove small areas of glare
|
| 44 |
+
kernel = np.ones((5,5),np.uint8)
|
| 45 |
+
mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
|
| 46 |
+
|
| 47 |
+
# Apply the mask to the original image to remove flashlight glare
|
| 48 |
+
result = cv2.bitwise_and(image, image, mask=mask)
|
| 49 |
+
|
| 50 |
+
cv2.imwrite(os.path.join(file_Directory, 'remove_flash.jpg'), result)
|
| 51 |
+
|
| 52 |
def sharpen(self, imagepth):
|
| 53 |
image = cv2.imread(imagepth)
|
| 54 |
# Create the sharpening kernel
|
|
|
|
| 102 |
cv2.imwrite(imagepath, image2)
|
| 103 |
|
| 104 |
|
| 105 |
+
obj = Image_Enhance(r"data/Catalog Digitization/ONDC Test Data _ Images/Product Images/Bru_Instant_Coffee_Powder.png")
|
| 106 |
pth = obj.brightness_Adjust()
|
| 107 |
sharpen = obj.sharpen(pth)
|
| 108 |
lapacian_sharpen = obj.lapacian_sharpen(sharpen)
|
| 109 |
+
noise = obj.removing_noise(sharpen)
|
| 110 |
+
obj.enhance_color(noise)
|
| 111 |
+
obj.remove_flash(sharpen)
|