Spaces:
Build error
Build error
Chia Woon Yap
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -184,11 +184,39 @@ def extract_text_from_pptx(pptx_path):
|
|
| 184 |
return f"Error extracting text from PowerPoint: {str(e)}"
|
| 185 |
|
| 186 |
# Function to process documents safely
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
def process_document(file):
|
| 188 |
try:
|
|
|
|
|
|
|
|
|
|
| 189 |
file_extension = os.path.splitext(file.name)[-1].lower()
|
|
|
|
| 190 |
if file_extension in [".png", ".jpg", ".jpeg"]:
|
| 191 |
return "Error: Images cannot be processed for text extraction."
|
|
|
|
| 192 |
if file_extension == ".pdf":
|
| 193 |
content = extract_text_from_pdf(file.name)
|
| 194 |
elif file_extension == ".docx":
|
|
@@ -199,14 +227,18 @@ def process_document(file):
|
|
| 199 |
encoding = detect_encoding(file.name)
|
| 200 |
with open(file.name, "r", encoding=encoding, errors="replace") as f:
|
| 201 |
content = f.read()
|
|
|
|
| 202 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
| 203 |
documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(content)]
|
| 204 |
vectorstore.add_documents(documents)
|
| 205 |
quiz = generate_quiz(content)
|
| 206 |
return f"Document processed successfully (File Type: {file_extension}). Quiz generated:\n{quiz}"
|
|
|
|
| 207 |
except Exception as e:
|
| 208 |
return f"Error processing document: {str(e)}"
|
| 209 |
|
|
|
|
|
|
|
| 210 |
# Function to handle speech-to-text conversion
|
| 211 |
def transcribe_audio(audio):
|
| 212 |
sr, y = audio
|
|
|
|
| 184 |
return f"Error extracting text from PowerPoint: {str(e)}"
|
| 185 |
|
| 186 |
# Function to process documents safely
|
| 187 |
+
#def process_document(file):
|
| 188 |
+
# try:
|
| 189 |
+
# file_extension = os.path.splitext(file.name)[-1].lower()
|
| 190 |
+
# if file_extension in [".png", ".jpg", ".jpeg"]:
|
| 191 |
+
# return "Error: Images cannot be processed for text extraction."
|
| 192 |
+
# if file_extension == ".pdf":
|
| 193 |
+
# content = extract_text_from_pdf(file.name)
|
| 194 |
+
# elif file_extension == ".docx":
|
| 195 |
+
# content = extract_text_from_docx(file.name)
|
| 196 |
+
# elif file_extension == ".pptx":
|
| 197 |
+
# content = extract_text_from_pptx(file.name)
|
| 198 |
+
# else:
|
| 199 |
+
# encoding = detect_encoding(file.name)
|
| 200 |
+
# with open(file.name, "r", encoding=encoding, errors="replace") as f:
|
| 201 |
+
# content = f.read()
|
| 202 |
+
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
| 203 |
+
# documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(content)]
|
| 204 |
+
# vectorstore.add_documents(documents)
|
| 205 |
+
# quiz = generate_quiz(content)
|
| 206 |
+
# return f"Document processed successfully (File Type: {file_extension}). Quiz generated:\n{quiz}"
|
| 207 |
+
# except Exception as e:
|
| 208 |
+
# return f"Error processing document: {str(e)}"
|
| 209 |
+
|
| 210 |
def process_document(file):
|
| 211 |
try:
|
| 212 |
+
if not file or not hasattr(file, "name") or not isinstance(file.name, str):
|
| 213 |
+
return "Error: Invalid file uploaded."
|
| 214 |
+
|
| 215 |
file_extension = os.path.splitext(file.name)[-1].lower()
|
| 216 |
+
|
| 217 |
if file_extension in [".png", ".jpg", ".jpeg"]:
|
| 218 |
return "Error: Images cannot be processed for text extraction."
|
| 219 |
+
|
| 220 |
if file_extension == ".pdf":
|
| 221 |
content = extract_text_from_pdf(file.name)
|
| 222 |
elif file_extension == ".docx":
|
|
|
|
| 227 |
encoding = detect_encoding(file.name)
|
| 228 |
with open(file.name, "r", encoding=encoding, errors="replace") as f:
|
| 229 |
content = f.read()
|
| 230 |
+
|
| 231 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
| 232 |
documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(content)]
|
| 233 |
vectorstore.add_documents(documents)
|
| 234 |
quiz = generate_quiz(content)
|
| 235 |
return f"Document processed successfully (File Type: {file_extension}). Quiz generated:\n{quiz}"
|
| 236 |
+
|
| 237 |
except Exception as e:
|
| 238 |
return f"Error processing document: {str(e)}"
|
| 239 |
|
| 240 |
+
|
| 241 |
+
|
| 242 |
# Function to handle speech-to-text conversion
|
| 243 |
def transcribe_audio(audio):
|
| 244 |
sr, y = audio
|