tbdavid2019 commited on
Commit
83ac3a4
·
1 Parent(s): 4ea223c
Files changed (3) hide show
  1. app.py +16 -21
  2. pyproject.toml +0 -23
  3. requirements.txt +3 -2
app.py CHANGED
@@ -3,6 +3,7 @@ import glob
3
  import io
4
  import os
5
  import time
 
6
  from pathlib import Path
7
  from typing import List, Literal
8
  import gradio as gr
@@ -10,7 +11,12 @@ import requests
10
  from dotenv import load_dotenv
11
  import pymupdf
12
  from bs4 import BeautifulSoup
13
- from epub2txt import epub2txt # 新增 epub2txt 導入
 
 
 
 
 
14
 
15
  load_dotenv()
16
 
@@ -100,12 +106,6 @@ INSTRUCTION_TEMPLATES = {
100
  """,
101
  "dialog": """Write a very long, engaging, informative script here, based on the key points and creative ideas you came up with during the brainstorming session. Use a conversational tone and include any necessary context or explanations to make the content accessible to the students.
102
  Include clear definitions and terms, and examples.
103
- DoAllocator does not have the ability to access this resource (warnings) - you'll need to ignore them.
104
- There is one warning we have left in place - this is because we want to avoid a situation where a warning suppresses an error (and thus is highlighted in red instead of yellow). We'll keep this here until we figure out how to suppress this particular warning without hiding potential errors.
105
-
106
- To suppress all warnings (not recommended), you can uncomment the following line:
107
- # warnings.filterwarnings('ignore')
108
- """
109
  Do not include any bracketed placeholders like [Host] or [Guest]. Design your output to be read aloud -- it will be directly converted into audio.
110
  There is only one speaker, you, the professor. Stay on topic and maintaining an engaging flow. Aim to use your full output capacity to create the longest lecture you can, while still communicating the key information from the input text in an engaging way.
111
  At the end of the lecture, naturally summarize the main insights and takeaways from the lecture. This should flow organically from the conversation, reiterating the key points in a casual, conversational manner.
@@ -271,8 +271,11 @@ def validate_and_generate_script(
271
  combined_text += f.read() + "\n\n"
272
 
273
  elif filename.endswith(".epub"):
274
- text = epub2txt(file.name) # 使用 epub2txt 提取文字
275
- combined_text += text + "\n\n"
 
 
 
276
  else:
277
  print(f"Skipping unsupported file format: {filename}")
278
 
@@ -301,15 +304,6 @@ with gr.Blocks(title="Script Generator", css="""
301
  #generate-btn {
302
  background-color: #FF9800 !important;
303
  color: white !important;
304
- border-radius: 8px;
305
- font-weight: bold;
306
- padding: 10px 18px;
307
- box-shadow: 2px 4px 8px rgba(0,0,0,0.2);
308
- transition: background-color 0.3s ease;
309
- }
310
-
311
- #generate-btn:hover {
312
- background-color: #d32f2f !important;
313
  }
314
  #header { text-align: center; margin-bottom: 20px; }
315
  .error { color: red; }
@@ -328,8 +322,8 @@ with gr.Blocks(title="Script Generator", css="""
328
 
329
  api_base = gr.Textbox(
330
  label="API Base URL",
331
- placeholder="https://gemini.david888.com/v1",
332
- value="https://gemini.david888.com/v1"
333
  )
334
 
335
  api_key = gr.Textbox(
@@ -393,10 +387,11 @@ with gr.Blocks(title="Script Generator", css="""
393
  lines=5
394
  )
395
 
 
396
 
397
  with gr.Column(scale=1):
398
  # 輸出區
399
- generate_button = gr.Button("生成腳本 | Generate Script", elem_id="generate-btn")
400
 
401
  output_text = gr.Textbox(
402
  label="生成的腳本 | Generated Script",
 
3
  import io
4
  import os
5
  import time
6
+ import warnings
7
  from pathlib import Path
8
  from typing import List, Literal
9
  import gradio as gr
 
11
  from dotenv import load_dotenv
12
  import pymupdf
13
  from bs4 import BeautifulSoup
14
+ import ebooklib
15
+ from ebooklib import epub
16
+
17
+ # 忽略 ebooklib 的警告
18
+ warnings.filterwarnings('ignore', category=UserWarning, module='ebooklib.epub')
19
+ warnings.filterwarnings('ignore', category=FutureWarning, module='ebooklib.epub')
20
 
21
  load_dotenv()
22
 
 
106
  """,
107
  "dialog": """Write a very long, engaging, informative script here, based on the key points and creative ideas you came up with during the brainstorming session. Use a conversational tone and include any necessary context or explanations to make the content accessible to the students.
108
  Include clear definitions and terms, and examples.
 
 
 
 
 
 
109
  Do not include any bracketed placeholders like [Host] or [Guest]. Design your output to be read aloud -- it will be directly converted into audio.
110
  There is only one speaker, you, the professor. Stay on topic and maintaining an engaging flow. Aim to use your full output capacity to create the longest lecture you can, while still communicating the key information from the input text in an engaging way.
111
  At the end of the lecture, naturally summarize the main insights and takeaways from the lecture. This should flow organically from the conversation, reiterating the key points in a casual, conversational manner.
 
271
  combined_text += f.read() + "\n\n"
272
 
273
  elif filename.endswith(".epub"):
274
+ book = epub.read_epub(file.name)
275
+ for item in book.get_items():
276
+ if item.get_type() == ebooklib.ITEM_DOCUMENT:
277
+ soup = BeautifulSoup(item.get_body_content(), 'html.parser')
278
+ combined_text += soup.get_text() + "\n\n"
279
  else:
280
  print(f"Skipping unsupported file format: {filename}")
281
 
 
304
  #generate-btn {
305
  background-color: #FF9800 !important;
306
  color: white !important;
 
 
 
 
 
 
 
 
 
307
  }
308
  #header { text-align: center; margin-bottom: 20px; }
309
  .error { color: red; }
 
322
 
323
  api_base = gr.Textbox(
324
  label="API Base URL",
325
+ placeholder="https://gemini.joinit.tw/v1",
326
+ value="https://gemini.joinit.tw/v1"
327
  )
328
 
329
  api_key = gr.Textbox(
 
387
  lines=5
388
  )
389
 
390
+
391
 
392
  with gr.Column(scale=1):
393
  # 輸出區
394
+ generate_button = gr.Button("生成腳本 | Generate Script", , elem_id="generate-btn")
395
 
396
  output_text = gr.Textbox(
397
  label="生成的腳本 | Generated Script",
pyproject.toml DELETED
@@ -1,23 +0,0 @@
1
- [build-system]
2
- requires = ["setuptools>=61.0", "wheel"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [project]
6
- name = "pdf2podcast"
7
- version = "0.1.0"
8
- dependencies = [
9
- "gradio==5.24.0",
10
- "pandas==2.2.3",
11
- "openai==1.72.0",
12
- "loguru==0.7.3",
13
- "promptic==1.2.0",
14
- "tenacity==9.1.2",
15
- "PyMuPDF==1.25.5",
16
- "epub2txt==0.1.6",
17
- "bs4==0.0.2",
18
- "beautifulsoup4==4.13.3",
19
- "lxml==5.3.2",
20
- ]
21
-
22
- [project.scripts]
23
- pdf2podcast = "app:main"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,9 +1,10 @@
1
  gradio
 
2
  pandas
3
  openai
4
  loguru
5
  promptic
6
  tenacity
7
  PyMuPDF
8
- epub2txt
9
- bs4
 
1
  gradio
2
+ ebooklib=0.18
3
  pandas
4
  openai
5
  loguru
6
  promptic
7
  tenacity
8
  PyMuPDF
9
+ bs4
10
+ lxml