Fabrice-TIERCELIN commited on
Commit
a6f998e
·
verified ·
1 Parent(s): c9670e9

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +7 -14
  2. app.py +0 -0
  3. infer.py +85 -85
  4. requirements.txt +21 -22
README.md CHANGED
@@ -1,21 +1,14 @@
1
  ---
2
- title: FramePack/HunyuanVideo
3
- emoji: 🎥
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 5.29.1
8
  app_file: app.py
 
9
  license: apache-2.0
10
- short_description: Text-to-Video/Image-to-Video/Video extender (timed prompt)
11
- tags:
12
- - Image-to-Video
13
- - Image-2-Video
14
- - Img-to-Vid
15
- - Img-2-Vid
16
- - language models
17
- - LLMs
18
- suggested_hardware: zero-a10g
19
  ---
20
 
21
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: RealESRGAN Pytorch
3
+ emoji: 🔥📹
4
+ colorFrom: indigo
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.37.0
8
  app_file: app.py
9
+ pinned: true
10
  license: apache-2.0
11
+ short_description: User Friendly Image & Video Upscaler!
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
The diff for this file is too large to render. See raw diff
 
infer.py CHANGED
@@ -1,86 +1,86 @@
1
- from PIL import Image
2
- import cv2 as cv
3
- import torch
4
- from RealESRGAN import RealESRGAN
5
- import tempfile
6
- import numpy as np
7
- import tqdm
8
- import ffmpeg
9
- import spaces
10
-
11
-
12
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13
-
14
- @spaces.GPU(duration=60)
15
- def infer_image(img: Image.Image, size_modifier: int ) -> Image.Image:
16
- if img is None:
17
- raise Exception("Image not uploaded")
18
-
19
- width, height = img.size
20
-
21
- if width >= 5000 or height >= 5000:
22
- raise Exception("The image is too large.")
23
-
24
- model = RealESRGAN(device, scale=size_modifier)
25
- model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)
26
-
27
- result = model.predict(img.convert('RGB'))
28
- print(f"Image size ({device}): {size_modifier} ... OK")
29
- return result
30
-
31
- @spaces.GPU(duration=60)
32
- def infer_video(video_filepath: str, size_modifier: int) -> str:
33
- model = RealESRGAN(device, scale=size_modifier)
34
- model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)
35
-
36
- cap = cv.VideoCapture(video_filepath)
37
-
38
- tmpfile = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
39
- vid_output = tmpfile.name
40
- tmpfile.close()
41
-
42
- # Check if the input video has an audio stream
43
- probe = ffmpeg.probe(video_filepath)
44
- has_audio = any(stream['codec_type'] == 'audio' for stream in probe['streams'])
45
-
46
- if has_audio:
47
- # Extract audio from the input video
48
- audio_file = video_filepath.replace(".mp4", ".wav")
49
- ffmpeg.input(video_filepath).output(audio_file, format='wav', ac=1).run(overwrite_output=True)
50
-
51
- vid_writer = cv.VideoWriter(
52
- vid_output,
53
- fourcc=cv.VideoWriter.fourcc(*'mp4v'),
54
- fps=cap.get(cv.CAP_PROP_FPS),
55
- frameSize=(int(cap.get(cv.CAP_PROP_FRAME_WIDTH)) * size_modifier, int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)) * size_modifier)
56
- )
57
-
58
- n_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
59
-
60
- for _ in tqdm.tqdm(range(n_frames)):
61
- ret, frame = cap.read()
62
- if not ret:
63
- break
64
-
65
- frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
66
- frame = Image.fromarray(frame)
67
-
68
- upscaled_frame = model.predict(frame.convert('RGB'))
69
-
70
- upscaled_frame = np.array(upscaled_frame)
71
- upscaled_frame = cv.cvtColor(upscaled_frame, cv.COLOR_RGB2BGR)
72
-
73
- vid_writer.write(upscaled_frame)
74
-
75
- vid_writer.release()
76
-
77
- if has_audio:
78
- # Re-encode the video with the modified audio
79
- ffmpeg.input(vid_output).output(video_filepath.replace(".mp4", "_upscaled.mp4"), vcodec='libx264', acodec='aac', audio_bitrate='320k').run(overwrite_output=True)
80
-
81
- # Replace the original audio with the upscaled audio
82
- ffmpeg.input(audio_file).output(video_filepath.replace(".mp4", "_upscaled.mp4"), acodec='aac', audio_bitrate='320k').run(overwrite_output=True)
83
-
84
- print(f"Video file : {video_filepath}")
85
-
86
  return vid_output.replace(".mp4", "_upscaled.mp4") if has_audio else vid_output
 
1
+ from PIL import Image
2
+ import cv2 as cv
3
+ import torch
4
+ from RealESRGAN import RealESRGAN
5
+ import tempfile
6
+ import numpy as np
7
+ import tqdm
8
+ import ffmpeg
9
+ import spaces
10
+
11
+
12
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13
+
14
+ @spaces.GPU(duration=60)
15
+ def infer_image(img: Image.Image, size_modifier: int ) -> Image.Image:
16
+ if img is None:
17
+ raise Exception("Image not uploaded")
18
+
19
+ width, height = img.size
20
+
21
+ if width >= 5000 or height >= 5000:
22
+ raise Exception("The image is too large.")
23
+
24
+ model = RealESRGAN(device, scale=size_modifier)
25
+ model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)
26
+
27
+ result = model.predict(img.convert('RGB'))
28
+ print(f"Image size ({device}): {size_modifier} ... OK")
29
+ return result
30
+
31
+ @spaces.GPU(duration=120)
32
+ def infer_video(video_filepath: str, size_modifier: int) -> str:
33
+ model = RealESRGAN(device, scale=size_modifier)
34
+ model.load_weights(f'weights/RealESRGAN_x{size_modifier}.pth', download=False)
35
+
36
+ cap = cv.VideoCapture(video_filepath)
37
+
38
+ tmpfile = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
39
+ vid_output = tmpfile.name
40
+ tmpfile.close()
41
+
42
+ # Check if the input video has an audio stream
43
+ probe = ffmpeg.probe(video_filepath)
44
+ has_audio = any(stream['codec_type'] == 'audio' for stream in probe['streams'])
45
+
46
+ if has_audio:
47
+ # Extract audio from the input video
48
+ audio_file = video_filepath.replace(".mp4", ".wav")
49
+ ffmpeg.input(video_filepath).output(audio_file, format='wav', ac=1).run(overwrite_output=True)
50
+
51
+ vid_writer = cv.VideoWriter(
52
+ vid_output,
53
+ fourcc=cv.VideoWriter.fourcc(*'mp4v'),
54
+ fps=cap.get(cv.CAP_PROP_FPS),
55
+ frameSize=(int(cap.get(cv.CAP_PROP_FRAME_WIDTH)) * size_modifier, int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)) * size_modifier)
56
+ )
57
+
58
+ n_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
59
+
60
+ for _ in tqdm.tqdm(range(n_frames)):
61
+ ret, frame = cap.read()
62
+ if not ret:
63
+ break
64
+
65
+ frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
66
+ frame = Image.fromarray(frame)
67
+
68
+ upscaled_frame = model.predict(frame.convert('RGB'))
69
+
70
+ upscaled_frame = np.array(upscaled_frame)
71
+ upscaled_frame = cv.cvtColor(upscaled_frame, cv.COLOR_RGB2BGR)
72
+
73
+ vid_writer.write(upscaled_frame)
74
+
75
+ vid_writer.release()
76
+
77
+ if has_audio:
78
+ # Re-encode the video with the modified audio
79
+ ffmpeg.input(vid_output).output(video_filepath.replace(".mp4", "_upscaled.mp4"), vcodec='libx264', acodec='aac', audio_bitrate='320k').run(overwrite_output=True)
80
+
81
+ # Replace the original audio with the upscaled audio
82
+ ffmpeg.input(audio_file).output(video_filepath.replace(".mp4", "_upscaled.mp4"), acodec='aac', audio_bitrate='320k').run(overwrite_output=True)
83
+
84
+ print(f"Video file : {video_filepath}")
85
+
86
  return vid_output.replace(".mp4", "_upscaled.mp4") if has_audio else vid_output
requirements.txt CHANGED
@@ -1,23 +1,22 @@
1
- accelerate==1.7.0
2
- diffusers==0.33.1
3
- transformers==4.52.4
4
- sentencepiece==0.2.0
5
- pillow==11.2.1
6
- av==12.1.0
7
- numpy==1.26.2
8
- scipy==1.12.0
9
- requests==2.32.4
10
- torchsde==0.2.6
11
- torch>=2.0.0
12
  torchvision
13
- torchaudio
14
- einops
15
- opencv-contrib-python
16
- safetensors
17
- huggingface_hub
18
- decord
19
- imageio_ffmpeg==0.6.0
20
- sageattention==1.0.6
21
- xformers==0.0.29.post3
22
- bitsandbytes==0.46.0
23
- pillow-heif==0.22.0
 
 
 
1
+ pydantic==2.10.6 # To avoid the message "No API found" or "Internal server error"
2
+
3
+ gradio==4.28.3
4
+ torch
5
+ numpy
6
+ opencv-python-headless
7
+ setuptools
8
+ Pillow
 
 
 
9
  torchvision
10
+ addict
11
+ future
12
+ lmdb
13
+ pyyaml
14
+ requests
15
+ scikit-image
16
+ scipy
17
+ tb-nightly
18
+ tqdm
19
+ yapf
20
+ psutil
21
+ ffmpeg-python
22
+ huggingface_hub