Spaces:

VDNT11
/

AIML_project

Sleeping

App Files Files Community

VDNT11 commited on Nov 22, 2024

Commit

ba70d3f

verified ·

1 Parent(s): 977a686

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -32

app.py CHANGED Viewed

@@ -1,30 +1,50 @@
 import streamlit as st
 import torch
 import librosa
 import matplotlib.pyplot as plt
 from PIL import Image
-import os
-# Import the required functions and classes from your previous code
-from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
 import torchaudio
-import torch
 from transformers import (
     AutoModelForSeq2SeqLM,
     AutoTokenizer,
 )
-from IndicTransToolkit import IndicProcessor
-from transformers import BitsAndBytesConfig
-from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
-from diffusers import StableDiffusionImg2ImgPipeline
 import stanza
-# Ensure you have the same TransGen class and other supporting functions from your previous implementation
 class TransGen:
-    def __init__(self, translation_model="ai4bharat/indictrans2-indic-en-1B",
-                 stable_diff_model="stabilityai/stable-diffusion-2-base",
-                 src_lang='hin_Deva', tgt_lang='eng_Latn'):
-        # Same implementation as in your previous code
         self.bnb_config = BitsAndBytesConfig(load_in_4bit=True)
         self.tokenizer = AutoTokenizer.from_pretrained(translation_model, trust_remote_code=True)
         self.model = AutoModelForSeq2SeqLM.from_pretrained(translation_model, trust_remote_code=True, quantization_config=self.bnb_config)
@@ -40,7 +60,6 @@ class TransGen:
         self.img2img_pipe = self.img2img_pipe.to('cuda')
     def translate(self, input_sentences):
-        # Same implementation as in your previous code
         batch = self.ip.preprocess_batch(
             input_sentences,
             src_lang=self.src_lang,
@@ -72,11 +91,9 @@ class TransGen:
             )
         translations = self.ip.postprocess_batch(generated_tokens, lang=self.tgt_lang)
         return translations
     def generate_image(self, prompt, prev_image, strength=1.0, guidance_scale=7.5):
-        # Same implementation as in your previous code
         strength = float(strength) if strength is not None else 1.0
         guidance_scale = float(guidance_scale) if guidance_scale is not None else 7.5
@@ -96,18 +113,12 @@ class TransGen:
         return image.images[0]
     def run(self, input_sentences, strength, guidance_scale, prev_image=None):
-        # Same implementation as in your previous code
         translations = self.translate(input_sentences)
         sentence = translations[0]
         image = self.generate_image(sentence, prev_image, strength, guidance_scale)
         return sentence, image
-# Initialize global variables
-stanza.download('hi')
-transgen = TransGen()
 def transcribe_audio_to_hindi(audio_path: str) -> str:
-    # Same implementation as in your previous code
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
@@ -138,25 +149,23 @@ def transcribe_audio_to_hindi(audio_path: str) -> str:
     result = whisper_pipe(waveform.squeeze(0).cpu().numpy(), return_timestamps=True)
     return result["text"]
 nlp = stanza.Pipeline(lang='hi', processors='tokenize,pos')
-def POS_policy(input):
-    # Same implementation as in your previous code
-    lst = input
-    doc = nlp(lst)
     words = doc.sentences[-1].words
     n = len(words)
     i = n-1
-    while(i):
-        if words[i].upos == 'NOUN' or words[i].upos == 'VERB':
             return i
-        else:
-            pass
         i -= 1
     return 0
 def generate_images_from_audio(audio_path, base_strength=0.8, base_guidance_scale=12):
-    # Similar implementation with modifications for Streamlit
     text_tot = transcribe_audio_to_hindi(audio_path)
     st.write(f'Transcripted sentence: {text_tot}')
@@ -164,6 +173,7 @@ def generate_images_from_audio(audio_path, base_strength=0.8, base_guidance_scal
     cur_sent = ''
     prev_idx = 0
     generated_images = []
     for word in text_tot.split():
         cur_sent += word + ' '

+import os
+import subprocess
+import sys
+# Clone required repositories
+def clone_repositories():
+    repos = [
+        ('https://github.com/AI4Bharat/IndicTrans2.git', 'indictrans2'),
+        ('https://github.com/VarunGumma/IndicTransToolkit.git', 'indictranstoolkit')
+    ]
+    for repo_url, repo_dir in repos:
+        if not os.path.exists(repo_dir):
+            subprocess.check_call(['git', 'clone', repo_url, repo_dir])
+            sys.path.append(os.path.abspath(repo_dir))
+# Clone repositories before importing
+clone_repositories()
 import streamlit as st
 import torch
 import librosa
 import matplotlib.pyplot as plt
 from PIL import Image
 import torchaudio
 from transformers import (
+    AutoModelForSpeechSeq2Seq,
+    AutoProcessor,
+    pipeline,
     AutoModelForSeq2SeqLM,
     AutoTokenizer,
+    BitsAndBytesConfig
 )
+from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler, StableDiffusionImg2ImgPipeline
 import stanza
+import numpy as np
+from indictranstoolkit import IndicProcessor
 class TransGen:
+    def __init__(
+        self,
+        translation_model="ai4bharat/indictrans2-indic-en-1B",
+        stable_diff_model="stabilityai/stable-diffusion-2-base",
+        src_lang='hin_Deva',
+        tgt_lang='eng_Latn'
+    ):
         self.bnb_config = BitsAndBytesConfig(load_in_4bit=True)
         self.tokenizer = AutoTokenizer.from_pretrained(translation_model, trust_remote_code=True)
         self.model = AutoModelForSeq2SeqLM.from_pretrained(translation_model, trust_remote_code=True, quantization_config=self.bnb_config)
         self.img2img_pipe = self.img2img_pipe.to('cuda')
     def translate(self, input_sentences):
         batch = self.ip.preprocess_batch(
             input_sentences,
             src_lang=self.src_lang,
             )
         translations = self.ip.postprocess_batch(generated_tokens, lang=self.tgt_lang)
         return translations
     def generate_image(self, prompt, prev_image, strength=1.0, guidance_scale=7.5):
         strength = float(strength) if strength is not None else 1.0
         guidance_scale = float(guidance_scale) if guidance_scale is not None else 7.5
         return image.images[0]
     def run(self, input_sentences, strength, guidance_scale, prev_image=None):
         translations = self.translate(input_sentences)
         sentence = translations[0]
         image = self.generate_image(sentence, prev_image, strength, guidance_scale)
         return sentence, image
 def transcribe_audio_to_hindi(audio_path: str) -> str:
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
     result = whisper_pipe(waveform.squeeze(0).cpu().numpy(), return_timestamps=True)
     return result["text"]
+# Download Stanza resources
+stanza.download('hi')
 nlp = stanza.Pipeline(lang='hi', processors='tokenize,pos')
+def POS_policy(input_text):
+    doc = nlp(input_text)
     words = doc.sentences[-1].words
     n = len(words)
     i = n-1
+    while i >= 0:
+        if words[i].upos in ['NOUN', 'VERB']:
             return i
         i -= 1
     return 0
 def generate_images_from_audio(audio_path, base_strength=0.8, base_guidance_scale=12):
     text_tot = transcribe_audio_to_hindi(audio_path)
     st.write(f'Transcripted sentence: {text_tot}')
     cur_sent = ''
     prev_idx = 0
     generated_images = []
+    transgen = TransGen()
     for word in text_tot.split():
         cur_sent += word + ' '