dahyedahye commited on
Commit
4d0661a
Β·
1 Parent(s): 9dc0ed2
Files changed (4) hide show
  1. .env.backup +0 -2
  2. app.py +2 -27
  3. app.py.backup +0 -126
  4. env_backup +0 -2
.env.backup DELETED
@@ -1,2 +0,0 @@
1
- SMTP_USER=kyowoon.lee1924@gmail.com
2
- SMTP_PASSWORD=rhwy amyv hdwq pspa
 
 
 
app.py CHANGED
@@ -3,9 +3,6 @@ import re
3
  import uuid
4
  import gdown
5
  import whisper
6
- import smtplib
7
- from email.mime.text import MIMEText
8
- from email.mime.multipart import MIMEMultipart
9
  from concurrent.futures import ThreadPoolExecutor
10
  from fastapi import FastAPI, HTTPException, BackgroundTasks
11
  from fastapi.responses import JSONResponse
@@ -29,7 +26,7 @@ app = FastAPI(
29
  )
30
 
31
  # Whisper λͺ¨λΈ λ‘œλ“œ
32
- model = whisper.load_model("tiny")
33
 
34
  postmark = PostmarkClient(server_token=os.getenv("POSTMARK_API_KEY"))
35
 
@@ -51,31 +48,9 @@ def extract_file_id(drive_url: str) -> str:
51
  raise ValueError("Invalid Google Drive URL")
52
 
53
  def send_email(to_email: str, srt_file_path: str, transcription_time: float):
54
- # smtp_server = "smtp.gmail.com"
55
- # smtp_port = 587
56
- # smtp_user = os.getenv("SMTP_USER")
57
- # smtp_password = os.getenv("SMTP_PASSWORD")
58
-
59
  subject = "[kyobody - μžλ§‰μƒμ„±] μž‘μ—…μ΄ μ™„λ£Œλ˜μ—ˆμŠ΅λ‹ˆλ‹€."
60
  body = f"[kyobody - μžλ§‰μƒμ„±] μž‘μ—…μ΄ μ™„λ£Œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. 총 μ†Œμš” μ‹œκ°„: {transcription_time:.2f} 초. SRT νŒŒμΌμ„ μ²¨λΆ€ν•˜μ—¬ μ „λ‹¬λ“œλ¦½λ‹ˆλ‹€."
61
 
62
- # msg = MIMEMultipart()
63
- # msg["From"] = smtp_user
64
- # msg["To"] = to_email
65
- # msg["Subject"] = subject
66
-
67
- # msg.attach(MIMEText(body, "plain"))
68
-
69
- # with open(srt_file_path, "r") as file:
70
- # attachment = MIMEText(file.read())
71
- # attachment.add_header("Content-Disposition", "attachment", filename=os.path.basename(srt_file_path))
72
- # msg.attach(attachment)
73
-
74
- # with smtplib.SMTP(smtp_server, smtp_port) as server:
75
- # server.starttls()
76
- # server.login(smtp_user, smtp_password)
77
- # server.sendmail(smtp_user, to_email, msg.as_string())
78
-
79
  email = postmark.emails.Email(
80
  From=os.getenv("FROM_EMAIL"),
81
  To=to_email,
@@ -120,7 +95,7 @@ def transcribe_and_send_email(temp_input_file: str, srt_file_path: str, email: s
120
  raise e
121
 
122
  @app.post("/transcribe/")
123
- def transcribe_video(url: str, email: str, background_tasks: BackgroundTasks, language: str = "en"):
124
  try:
125
  # Extract file ID and download the file
126
  file_id = extract_file_id(url)
 
3
  import uuid
4
  import gdown
5
  import whisper
 
 
 
6
  from concurrent.futures import ThreadPoolExecutor
7
  from fastapi import FastAPI, HTTPException, BackgroundTasks
8
  from fastapi.responses import JSONResponse
 
26
  )
27
 
28
  # Whisper λͺ¨λΈ λ‘œλ“œ
29
+ model = whisper.load_model("large-v2")
30
 
31
  postmark = PostmarkClient(server_token=os.getenv("POSTMARK_API_KEY"))
32
 
 
48
  raise ValueError("Invalid Google Drive URL")
49
 
50
  def send_email(to_email: str, srt_file_path: str, transcription_time: float):
 
 
 
 
 
51
  subject = "[kyobody - μžλ§‰μƒμ„±] μž‘μ—…μ΄ μ™„λ£Œλ˜μ—ˆμŠ΅λ‹ˆλ‹€."
52
  body = f"[kyobody - μžλ§‰μƒμ„±] μž‘μ—…μ΄ μ™„λ£Œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. 총 μ†Œμš” μ‹œκ°„: {transcription_time:.2f} 초. SRT νŒŒμΌμ„ μ²¨λΆ€ν•˜μ—¬ μ „λ‹¬λ“œλ¦½λ‹ˆλ‹€."
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  email = postmark.emails.Email(
55
  From=os.getenv("FROM_EMAIL"),
56
  To=to_email,
 
95
  raise e
96
 
97
  @app.post("/transcribe/")
98
+ def transcribe_video(url: str, email: str, background_tasks: BackgroundTasks, language: str = "ko"):
99
  try:
100
  # Extract file ID and download the file
101
  file_id = extract_file_id(url)
app.py.backup DELETED
@@ -1,126 +0,0 @@
1
- import os
2
- import re
3
- import uuid
4
- import gdown
5
- import whisper
6
- import smtplib
7
- from email.mime.text import MIMEText
8
- from email.mime.multipart import MIMEMultipart
9
- from concurrent.futures import ThreadPoolExecutor
10
- from fastapi import FastAPI, HTTPException, BackgroundTasks
11
- from fastapi.responses import JSONResponse
12
- from dotenv import load_dotenv
13
-
14
- # ꡬ글 λ“œλΌμ΄λΈŒ 링크, μ–΄λ–€ μ–Έμ–΄, 받을 이메일 μ£Όμ†Œ
15
-
16
- # .env νŒŒμΌμ—μ„œ μ€‘μš”ν•œ ν™˜κ²½ λ³€μˆ˜ λ‘œλ“œ
17
- load_dotenv()
18
-
19
- app = FastAPI(
20
- version="0.0.1",
21
- servers=[
22
- {
23
- "url": "https://leekwoon-whisper-api.hf.space",
24
- "description": "video/audio transcription API",
25
- }
26
- ],
27
- )
28
-
29
- # Whisper λͺ¨λΈ λ‘œλ“œ
30
- model = whisper.load_model("tiny")
31
-
32
- executor = ThreadPoolExecutor(max_workers=3) # μ΅œλŒ€ 3개의 μŠ€λ ˆλ“œλ‘œ 비동기 μž‘μ—… 처리
33
-
34
- def extract_file_id(drive_url: str) -> str:
35
- """
36
- Google Drive URLμ—μ„œ 파일 IDλ₯Ό μΆ”μΆœν•©λ‹ˆλ‹€.
37
- """
38
- match = re.search(r'/d/([a-zA-Z0-9_-]+)', drive_url)
39
- if match:
40
- return match.group(1)
41
- match = re.search(r'file/d/([a-zA-Z0-9_-]+)', drive_url)
42
- if match:
43
- return match.group(1)
44
- match = re.search(r'([a-zA-Z0-9_-]{33,})', drive_url)
45
- if match:
46
- return match.group(1)
47
- raise ValueError("Invalid Google Drive URL")
48
-
49
- def send_email(to_email: str, srt_file_path: str, transcription_time: float):
50
- smtp_server = "smtp.gmail.com"
51
- smtp_port = 587
52
- smtp_user = os.getenv("SMTP_USER")
53
- smtp_password = os.getenv("SMTP_PASSWORD")
54
-
55
- subject = "[kyobody - μžλ§‰μƒμ„±] μž‘μ—…μ΄ μ™„λ£Œλ˜μ—ˆμŠ΅λ‹ˆλ‹€."
56
- body = f"[kyobody - μžλ§‰μƒμ„±] μž‘μ—…μ΄ μ™„λ£Œλ˜μ—ˆμŠ΅λ‹ˆλ‹€. 총 μ†Œμš” μ‹œκ°„: {transcription_time:.2f} 초. SRT νŒŒμΌμ„ μ²¨λΆ€ν•˜μ—¬ μ „λ‹¬λ“œλ¦½λ‹ˆλ‹€."
57
-
58
- msg = MIMEMultipart()
59
- msg["From"] = smtp_user
60
- msg["To"] = to_email
61
- msg["Subject"] = subject
62
-
63
- msg.attach(MIMEText(body, "plain"))
64
-
65
- with open(srt_file_path, "r") as file:
66
- attachment = MIMEText(file.read())
67
- attachment.add_header("Content-Disposition", "attachment", filename=os.path.basename(srt_file_path))
68
- msg.attach(attachment)
69
-
70
- with smtplib.SMTP(smtp_server, smtp_port) as server:
71
- server.starttls()
72
- server.login(smtp_user, smtp_password)
73
- server.sendmail(smtp_user, to_email, msg.as_string())
74
-
75
- def transcribe_and_send_email(temp_input_file: str, srt_file_path: str, email: str, language: str):
76
- try:
77
- # Transcribe the video/audio file
78
- import time
79
- start_time = time.time()
80
- result = model.transcribe(temp_input_file, language=language)
81
- transcription_time = time.time() - start_time
82
-
83
- # Save the transcription to an SRT file
84
- with open(srt_file_path, "w") as srt_file:
85
- for i, segment in enumerate(result["segments"]):
86
- start = segment['start']
87
- end = segment['end']
88
- text = segment['text'][1:]
89
-
90
- start_time = f"{int(start // 3600):02}:{int((start % 3600) // 60):02}:{int(start % 60):02},{int((start * 1000) % 1000):03}"
91
- end_time = f"{int(end // 3600):02}:{int((end % 3600) // 60):02}:{int(end % 60):02},{int((end * 1000) % 1000):03}"
92
-
93
- srt_file.write(f"{i + 1}\n")
94
- srt_file.write(f"{start_time} --> {end_time}\n")
95
- srt_file.write(f"{text}\n\n")
96
-
97
- # Send the result via email
98
- send_email(email, srt_file_path, transcription_time)
99
-
100
- # Clean up the temporary files
101
- os.remove(temp_input_file)
102
- os.remove(srt_file_path)
103
-
104
- except Exception as e:
105
- raise e
106
-
107
- @app.post("/transcribe/")
108
- def transcribe_video(url: str, email: str, background_tasks: BackgroundTasks, language: str = "en"):
109
- try:
110
- # Extract file ID and download the file
111
- file_id = extract_file_id(url)
112
- download_url = f"https://drive.google.com/uc?id={file_id}"
113
- temp_input_file = f'/tmp/{uuid.uuid4()}.mp4'
114
- gdown.download(download_url, temp_input_file, quiet=False)
115
-
116
- # Define SRT file path
117
- srt_file_path = f'/tmp/{uuid.uuid4()}.srt'
118
-
119
- # Schedule the transcription and email sending in the background
120
- background_tasks.add_task(executor.submit, transcribe_and_send_email, temp_input_file, srt_file_path, email, language)
121
-
122
- # Respond to the client immediately
123
- return JSONResponse(status_code=202, content={"message": "Transcription started, you will receive an email when it's done."})
124
-
125
- except Exception as e:
126
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
env_backup DELETED
@@ -1,2 +0,0 @@
1
- SMTP_USER=kyowoon.lee1924@gmail.com
2
- SMTP_PASSWORD=rhwy amyv hdwq pspa