Spaces:

Lesterchia1
/

Test_TutorAI_We

Build error

App Files Files Community

Chia Woon Yap commited on Jun 24, 2025

Commit

48f37ba

verified ·

1 Parent(s): 8b93755

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -0

app.py CHANGED Viewed

@@ -184,11 +184,39 @@ def extract_text_from_pptx(pptx_path):
         return f"Error extracting text from PowerPoint: {str(e)}"
 # Function to process documents safely
 def process_document(file):
     try:
         file_extension = os.path.splitext(file.name)[-1].lower()
         if file_extension in [".png", ".jpg", ".jpeg"]:
             return "Error: Images cannot be processed for text extraction."
         if file_extension == ".pdf":
             content = extract_text_from_pdf(file.name)
         elif file_extension == ".docx":
@@ -199,14 +227,18 @@ def process_document(file):
             encoding = detect_encoding(file.name)
             with open(file.name, "r", encoding=encoding, errors="replace") as f:
                 content = f.read()
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
         documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(content)]
         vectorstore.add_documents(documents)
         quiz = generate_quiz(content)
         return f"Document processed successfully (File Type: {file_extension}). Quiz generated:\n{quiz}"
     except Exception as e:
         return f"Error processing document: {str(e)}"
 # Function to handle speech-to-text conversion
 def transcribe_audio(audio):
     sr, y = audio

         return f"Error extracting text from PowerPoint: {str(e)}"
 # Function to process documents safely
+#def process_document(file):
+#    try:
+#        file_extension = os.path.splitext(file.name)[-1].lower()
+#        if file_extension in [".png", ".jpg", ".jpeg"]:
+#            return "Error: Images cannot be processed for text extraction."
+#        if file_extension == ".pdf":
+#            content = extract_text_from_pdf(file.name)
+#        elif file_extension == ".docx":
+#            content = extract_text_from_docx(file.name)
+#        elif file_extension == ".pptx":
+#            content = extract_text_from_pptx(file.name)
+#        else:
+#            encoding = detect_encoding(file.name)
+#            with open(file.name, "r", encoding=encoding, errors="replace") as f:
+#               content = f.read()
+#        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+#        documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(content)]
+#        vectorstore.add_documents(documents)
+#        quiz = generate_quiz(content)
+#        return f"Document processed successfully (File Type: {file_extension}). Quiz generated:\n{quiz}"
+#    except Exception as e:
+#        return f"Error processing document: {str(e)}"
 def process_document(file):
     try:
+        if not file or not hasattr(file, "name") or not isinstance(file.name, str):
+            return "Error: Invalid file uploaded."
         file_extension = os.path.splitext(file.name)[-1].lower()
         if file_extension in [".png", ".jpg", ".jpeg"]:
             return "Error: Images cannot be processed for text extraction."
         if file_extension == ".pdf":
             content = extract_text_from_pdf(file.name)
         elif file_extension == ".docx":
             encoding = detect_encoding(file.name)
             with open(file.name, "r", encoding=encoding, errors="replace") as f:
                 content = f.read()
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
         documents = [Document(page_content=chunk) for chunk in text_splitter.split_text(content)]
         vectorstore.add_documents(documents)
         quiz = generate_quiz(content)
         return f"Document processed successfully (File Type: {file_extension}). Quiz generated:\n{quiz}"
     except Exception as e:
         return f"Error processing document: {str(e)}"
 # Function to handle speech-to-text conversion
 def transcribe_audio(audio):
     sr, y = audio