Spaces:
Sleeping
Sleeping
Update api/syllabus_utils.py
Browse files- api/syllabus_utils.py +7 -7
api/syllabus_utils.py
CHANGED
|
@@ -15,8 +15,6 @@ from pypdf import PdfReader
|
|
| 15 |
from pptx import Presentation # python-pptx
|
| 16 |
|
| 17 |
from api.config import DEFAULT_COURSE_TOPICS
|
| 18 |
-
from api.some_module import parse_syllabus_docx, parse_syllabus_pdf, parse_pptx_slides
|
| 19 |
-
|
| 20 |
|
| 21 |
|
| 22 |
def parse_syllabus_docx(path: str) -> List[str]:
|
|
@@ -42,7 +40,8 @@ def parse_syllabus_pdf(path: str) -> List[str]:
|
|
| 42 |
- 返回前若干段作为“课程大纲 topics”
|
| 43 |
"""
|
| 44 |
reader = PdfReader(path)
|
| 45 |
-
pages_text = []
|
|
|
|
| 46 |
for page in reader.pages:
|
| 47 |
text = page.extract_text() or ""
|
| 48 |
if text.strip():
|
|
@@ -79,7 +78,7 @@ def parse_pptx_slides(path: str) -> List[str]:
|
|
| 79 |
def extract_course_topics_from_file(file_obj, doc_type: str) -> List[str]:
|
| 80 |
"""
|
| 81 |
根据上传文件和 doc_type 提取课程大纲 topics。
|
| 82 |
-
- 只有 doc_type == "
|
| 83 |
- 支持 .docx / .pdf / .pptx
|
| 84 |
"""
|
| 85 |
if file_obj is None:
|
|
@@ -89,9 +88,10 @@ def extract_course_topics_from_file(file_obj, doc_type: str) -> List[str]:
|
|
| 89 |
if doc_type_norm != "syllabus":
|
| 90 |
return DEFAULT_COURSE_TOPICS
|
| 91 |
|
| 92 |
-
#
|
| 93 |
-
file_path = getattr(file_obj, "name", None)
|
| 94 |
-
if not file_path:
|
|
|
|
| 95 |
return DEFAULT_COURSE_TOPICS
|
| 96 |
|
| 97 |
ext = os.path.splitext(file_path)[1].lower()
|
|
|
|
| 15 |
from pptx import Presentation # python-pptx
|
| 16 |
|
| 17 |
from api.config import DEFAULT_COURSE_TOPICS
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def parse_syllabus_docx(path: str) -> List[str]:
|
|
|
|
| 40 |
- 返回前若干段作为“课程大纲 topics”
|
| 41 |
"""
|
| 42 |
reader = PdfReader(path)
|
| 43 |
+
pages_text: List[str] = []
|
| 44 |
+
|
| 45 |
for page in reader.pages:
|
| 46 |
text = page.extract_text() or ""
|
| 47 |
if text.strip():
|
|
|
|
| 78 |
def extract_course_topics_from_file(file_obj, doc_type: str) -> List[str]:
|
| 79 |
"""
|
| 80 |
根据上传文件和 doc_type 提取课程大纲 topics。
|
| 81 |
+
- 只有 doc_type == "syllabus" 时才尝试从文件解析;否则用默认大纲。
|
| 82 |
- 支持 .docx / .pdf / .pptx
|
| 83 |
"""
|
| 84 |
if file_obj is None:
|
|
|
|
| 88 |
if doc_type_norm != "syllabus":
|
| 89 |
return DEFAULT_COURSE_TOPICS
|
| 90 |
|
| 91 |
+
# 这里必须是“真实可读路径”,你的 server.py 会传 fo.name = /tmp/xxx
|
| 92 |
+
file_path = getattr(file_obj, "name", None)
|
| 93 |
+
if not file_path or not os.path.exists(file_path):
|
| 94 |
+
print(f"[Syllabus] file path missing or not found: {file_path!r}")
|
| 95 |
return DEFAULT_COURSE_TOPICS
|
| 96 |
|
| 97 |
ext = os.path.splitext(file_path)[1].lower()
|