KyosukeIchikawa commited on
Commit
bc0cd05
·
1 Parent(s): eab5dbb

Implement SessionManager for managing session data and directories

Browse files
tests/unit/test_session_manager.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for SessionManager module."""
2
+
3
+ from yomitalk.utils.session_manager import SessionManager
4
+
5
+
6
+ def test_session_manager_initialization():
7
+ """Test that SessionManager is initialized properly."""
8
+ session_manager = SessionManager()
9
+ assert session_manager.session_id is not None
10
+ assert isinstance(session_manager.session_id, str)
11
+ assert len(session_manager.session_id) > 0
12
+
13
+ # セッションIDのフォーマットを確認
14
+ assert session_manager.session_id.startswith("session_")
15
+
16
+
17
+ def test_session_dirs_creation():
18
+ """Test that SessionManager creates session-specific directories."""
19
+ session_manager = SessionManager()
20
+
21
+ # テンポラリディレクトリのテスト
22
+ temp_dir = session_manager.get_temp_dir()
23
+ assert temp_dir.exists()
24
+ assert temp_dir.is_dir()
25
+ assert session_manager.session_id in str(temp_dir)
26
+
27
+ # 出力ディレクトリのテスト
28
+ output_dir = session_manager.get_output_dir()
29
+ assert output_dir.exists()
30
+ assert output_dir.is_dir()
31
+ assert session_manager.session_id in str(output_dir)
32
+
33
+ # トーク一時ディレクトリのテスト
34
+ talk_temp_dir = session_manager.get_talk_temp_dir()
35
+ assert talk_temp_dir.exists()
36
+ assert talk_temp_dir.is_dir()
37
+ assert session_manager.session_id in str(talk_temp_dir)
38
+ assert "talks" in str(talk_temp_dir)
39
+
40
+
41
+ def test_unique_session_ids():
42
+ """Test that consecutive session managers get different session IDs."""
43
+ session1 = SessionManager()
44
+ session2 = SessionManager()
45
+ assert session1.session_id != session2.session_id
yomitalk/app.py CHANGED
@@ -7,7 +7,6 @@ Builds the Paper Podcast Generator application using Gradio.
7
 
8
  import math
9
  import os
10
- import uuid
11
  from pathlib import Path
12
  from typing import Any, Dict, List, Optional
13
 
@@ -18,8 +17,9 @@ from yomitalk.components.file_uploader import FileUploader
18
  from yomitalk.components.text_processor import TextProcessor
19
  from yomitalk.prompt_manager import DocumentType, PodcastMode
20
  from yomitalk.utils.logger import logger
 
21
 
22
- # Check for temporary file directories
23
  os.makedirs("data/temp", exist_ok=True)
24
  os.makedirs("data/output", exist_ok=True)
25
 
@@ -39,9 +39,19 @@ class PaperPodcastApp:
39
 
40
  Creates instances of FileUploader, TextProcessor, and AudioGenerator.
41
  """
42
- self.file_uploader = FileUploader()
 
 
 
 
 
 
 
43
  self.text_processor = TextProcessor()
44
- self.audio_generator = AudioGenerator()
 
 
 
45
 
46
  # Check if VOICEVOX Core is available
47
  self.voicevox_core_available = (
@@ -136,6 +146,7 @@ class PaperPodcastApp:
136
  Process file uploads.
137
 
138
  Properly handles file objects from Gradio's file upload component.
 
139
 
140
  Args:
141
  file_obj: Gradio's file object
@@ -146,45 +157,8 @@ class PaperPodcastApp:
146
  if file_obj is None:
147
  return None
148
 
149
- try:
150
- # Temporary directory path
151
- temp_dir = Path("data/temp")
152
- temp_dir.mkdir(parents=True, exist_ok=True)
153
-
154
- # Get filename
155
- if isinstance(file_obj, list) and len(file_obj) > 0:
156
- file_obj = file_obj[0] # Get first element if it's a list
157
-
158
- # セキュリティのため、オリジナルファイル名は使用せず、一意のIDを生成
159
- # ただし、元のファイル拡張子は保持する
160
- original_extension = ".txt" # デフォルト拡張子
161
- if hasattr(file_obj, "name"):
162
- # 元のファイルの拡張子を取得
163
- original_extension = os.path.splitext(Path(file_obj.name).name)[1]
164
- # 拡張子がない場合はデフォルト値を使用
165
- if not original_extension:
166
- original_extension = ".txt"
167
-
168
- # 安全なファイル名を生成(UUIDと元の拡張子を組み合わせる)
169
- filename = f"uploaded_{uuid.uuid4().hex}{original_extension}"
170
-
171
- # Create temporary file path
172
- temp_path = temp_dir / filename
173
-
174
- # Get and save file data
175
- if hasattr(file_obj, "read") and callable(file_obj.read):
176
- with open(temp_path, "wb") as f:
177
- f.write(file_obj.read())
178
- elif hasattr(file_obj, "name"):
179
- with open(temp_path, "wb") as f:
180
- with open(file_obj.name, "rb") as source:
181
- f.write(source.read())
182
-
183
- return str(temp_path)
184
-
185
- except Exception as e:
186
- logger.error(f"File processing error: {e}")
187
- return None
188
 
189
  def extract_file_text(self, file_obj) -> str:
190
  """
 
7
 
8
  import math
9
  import os
 
10
  from pathlib import Path
11
  from typing import Any, Dict, List, Optional
12
 
 
17
  from yomitalk.components.text_processor import TextProcessor
18
  from yomitalk.prompt_manager import DocumentType, PodcastMode
19
  from yomitalk.utils.logger import logger
20
+ from yomitalk.utils.session_manager import SessionManager
21
 
22
+ # Check for base directories
23
  os.makedirs("data/temp", exist_ok=True)
24
  os.makedirs("data/output", exist_ok=True)
25
 
 
39
 
40
  Creates instances of FileUploader, TextProcessor, and AudioGenerator.
41
  """
42
+ # セッション管理の初期化
43
+ self.session_manager = SessionManager()
44
+ logger.info(
45
+ f"Initializing app with session ID: {self.session_manager.get_session_id()}"
46
+ )
47
+
48
+ # 各コンポーネントにセッション固有のディレクトリを渡す
49
+ self.file_uploader = FileUploader(temp_dir=self.session_manager.get_temp_dir())
50
  self.text_processor = TextProcessor()
51
+ self.audio_generator = AudioGenerator(
52
+ session_output_dir=self.session_manager.get_output_dir(),
53
+ session_temp_dir=self.session_manager.get_talk_temp_dir(),
54
+ )
55
 
56
  # Check if VOICEVOX Core is available
57
  self.voicevox_core_available = (
 
146
  Process file uploads.
147
 
148
  Properly handles file objects from Gradio's file upload component.
149
+ This is now a wrapper around FileUploader's handle_file_upload method.
150
 
151
  Args:
152
  file_obj: Gradio's file object
 
157
  if file_obj is None:
158
  return None
159
 
160
+ # FileUploaderのhandle_file_uploadメソッドを使用
161
+ return self.file_uploader.handle_file_upload(file_obj)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  def extract_file_text(self, file_obj) -> str:
164
  """
yomitalk/components/audio_generator.py CHANGED
@@ -123,10 +123,31 @@ class AudioGenerator:
123
 
124
  CONVERSION_OVERRIDE = {"this": "ディス", "to": "トゥ", "a": "ア"}
125
 
126
- def __init__(self) -> None:
127
- """Initialize AudioGenerator."""
128
- self.output_dir = Path("data/output")
129
- self.temp_dir = Path("data/temp/talks")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  # VOICEVOX Core
132
  self.core_initialized = False
 
123
 
124
  CONVERSION_OVERRIDE = {"this": "ディス", "to": "トゥ", "a": "ア"}
125
 
126
+ def __init__(
127
+ self,
128
+ session_output_dir: Optional[Path] = None,
129
+ session_temp_dir: Optional[Path] = None,
130
+ ) -> None:
131
+ """
132
+ Initialize AudioGenerator.
133
+
134
+ Args:
135
+ session_output_dir (Optional[Path]): Session-specific output directory.
136
+ If not provided, defaults to "data/output"
137
+ session_temp_dir (Optional[Path]): Session-specific temporary directory.
138
+ If not provided, defaults to "data/temp/talks"
139
+ """
140
+ # Use session-specific directories if provided
141
+ self.output_dir = (
142
+ session_output_dir if session_output_dir else Path("data/output")
143
+ )
144
+ self.temp_dir = (
145
+ session_temp_dir if session_temp_dir else Path("data/temp/talks")
146
+ )
147
+
148
+ # Make sure directories exist
149
+ self.output_dir.mkdir(parents=True, exist_ok=True)
150
+ self.temp_dir.mkdir(parents=True, exist_ok=True)
151
 
152
  # VOICEVOX Core
153
  self.core_initialized = False
yomitalk/components/file_uploader.py CHANGED
@@ -4,7 +4,6 @@ Provides text extraction functionality for the Paper Podcast Generator applicati
4
  """
5
 
6
  import os
7
- from pathlib import Path
8
  from typing import List
9
 
10
  from yomitalk.utils.logger import logger
@@ -14,10 +13,14 @@ from yomitalk.utils.pdf_extractor import PDFExtractor
14
  class FileUploader:
15
  """Class for uploading files and extracting text."""
16
 
17
- def __init__(self) -> None:
18
- """Initialize FileUploader."""
19
- self.temp_dir = Path("data/temp")
20
- self.temp_dir.mkdir(parents=True, exist_ok=True)
 
 
 
 
21
  self.supported_text_extensions = [".txt", ".md", ".text", ".tmp"]
22
  self.supported_pdf_extensions = [".pdf"]
23
  self.supported_extensions = (
@@ -25,6 +28,12 @@ class FileUploader:
25
  )
26
  self.pdf_extractor = PDFExtractor()
27
 
 
 
 
 
 
 
28
  def extract_text_from_path(self, file_path: str) -> str:
29
  """
30
  Extract text from a file based on its extension.
@@ -76,6 +85,61 @@ class FileUploader:
76
  logger.error(f"Text file reading error: {e}")
77
  return f"Text file reading failed: {str(e)}"
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def get_supported_extensions(self) -> List[str]:
80
  """
81
  Get list of supported file extensions.
 
4
  """
5
 
6
  import os
 
7
  from typing import List
8
 
9
  from yomitalk.utils.logger import logger
 
13
  class FileUploader:
14
  """Class for uploading files and extracting text."""
15
 
16
+ def __init__(self, temp_dir=None) -> None:
17
+ """
18
+ Initialize FileUploader.
19
+
20
+ Args:
21
+ temp_dir (Optional[Path]): Session-specific temporary directory path.
22
+ If not provided, defaults to "data/temp"
23
+ """
24
  self.supported_text_extensions = [".txt", ".md", ".text", ".tmp"]
25
  self.supported_pdf_extensions = [".pdf"]
26
  self.supported_extensions = (
 
28
  )
29
  self.pdf_extractor = PDFExtractor()
30
 
31
+ # Set temporary directory
32
+ from pathlib import Path
33
+
34
+ self.temp_dir = Path(temp_dir) if temp_dir else Path("data/temp")
35
+ self.temp_dir.mkdir(parents=True, exist_ok=True)
36
+
37
  def extract_text_from_path(self, file_path: str) -> str:
38
  """
39
  Extract text from a file based on its extension.
 
85
  logger.error(f"Text file reading error: {e}")
86
  return f"Text file reading failed: {str(e)}"
87
 
88
+ def handle_file_upload(self, file_obj):
89
+ """
90
+ Process file uploads.
91
+
92
+ Properly handles file objects from Gradio's file upload component.
93
+
94
+ Args:
95
+ file_obj: Gradio's file object
96
+
97
+ Returns:
98
+ str: Path to the temporary file
99
+ """
100
+ if file_obj is None:
101
+ return None
102
+
103
+ try:
104
+ # Get filename
105
+ if isinstance(file_obj, list) and len(file_obj) > 0:
106
+ file_obj = file_obj[0] # Get first element if it's a list
107
+
108
+ # セキュリティのため、オリジナルファイル名は使用せず、一意のIDを生成
109
+ # ただし、元のファイル拡張子は保持する
110
+ import os
111
+ import uuid
112
+ from pathlib import Path
113
+
114
+ original_extension = ".txt" # デフォルト拡張子
115
+ if hasattr(file_obj, "name"):
116
+ # 元のファイルの拡張子を取得
117
+ original_extension = os.path.splitext(Path(file_obj.name).name)[1]
118
+ # 拡張子がない場合はデフォルト値を使用
119
+ if not original_extension:
120
+ original_extension = ".txt"
121
+
122
+ # 安全なファイル名を生成(UUIDと元の拡張子を組み合わせる)
123
+ filename = f"uploaded_{uuid.uuid4().hex}{original_extension}"
124
+
125
+ # セッション固有のtemp_dirを使用
126
+ temp_path = self.temp_dir / filename
127
+
128
+ # Get and save file data
129
+ if hasattr(file_obj, "read") and callable(file_obj.read):
130
+ with open(temp_path, "wb") as f:
131
+ f.write(file_obj.read())
132
+ elif hasattr(file_obj, "name"):
133
+ with open(temp_path, "wb") as f:
134
+ with open(file_obj.name, "rb") as source:
135
+ f.write(source.read())
136
+
137
+ return str(temp_path)
138
+
139
+ except Exception as e:
140
+ logger.error(f"File processing error: {e}")
141
+ return None
142
+
143
  def get_supported_extensions(self) -> List[str]:
144
  """
145
  Get list of supported file extensions.
yomitalk/utils/session_manager.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Session Manager Module.
2
+
3
+ This module provides functionality for managing Hugging Face Space sessions.
4
+ """
5
+
6
+ import time
7
+ import uuid
8
+ from pathlib import Path
9
+
10
+ from yomitalk.utils.logger import logger
11
+
12
+
13
+ class SessionManager:
14
+ """Class for managing session data across Hugging Face Space sessions."""
15
+
16
+ def __init__(self):
17
+ """Initialize SessionManager."""
18
+ self.session_id = self._generate_session_id()
19
+ self.base_temp_dir = Path("data/temp")
20
+ self.base_output_dir = Path("data/output")
21
+ logger.info(f"Session initialized with ID: {self.session_id}")
22
+
23
+ def _generate_session_id(self) -> str:
24
+ """
25
+ Generate a unique session ID.
26
+
27
+ Creates a unique ID based on timestamp and UUID to ensure uniqueness
28
+ across all environments.
29
+
30
+ Returns:
31
+ str: A unique session ID
32
+ """
33
+ # Always use UUID-based session ID
34
+ timestamp = int(time.time())
35
+ random_id = uuid.uuid4().hex[:8]
36
+ return f"session_{timestamp}_{random_id}"
37
+
38
+ def get_session_id(self) -> str:
39
+ """
40
+ Get the current session ID.
41
+
42
+ Returns:
43
+ str: The current session ID
44
+ """
45
+ return self.session_id
46
+
47
+ def get_temp_dir(self) -> Path:
48
+ """
49
+ Get the temporary directory for the current session.
50
+
51
+ Returns:
52
+ Path: Path to the session's temporary directory
53
+ """
54
+ session_temp_dir = self.base_temp_dir / self.session_id
55
+ session_temp_dir.mkdir(parents=True, exist_ok=True)
56
+ return session_temp_dir
57
+
58
+ def get_output_dir(self) -> Path:
59
+ """
60
+ Get the output directory for the current session.
61
+
62
+ Returns:
63
+ Path: Path to the session's output directory
64
+ """
65
+ session_output_dir = self.base_output_dir / self.session_id
66
+ session_output_dir.mkdir(parents=True, exist_ok=True)
67
+ return session_output_dir
68
+
69
+ def get_talk_temp_dir(self) -> Path:
70
+ """
71
+ Get the talks temporary directory for the current session.
72
+
73
+ Returns:
74
+ Path: Path to the session's talks temporary directory
75
+ """
76
+ talk_temp_dir = self.get_temp_dir() / "talks"
77
+ talk_temp_dir.mkdir(parents=True, exist_ok=True)
78
+ return talk_temp_dir