Spaces:

Alexvatti
/

Automatic-File-Renaming

Sleeping

Alexvatti commited on May 11, 2025

Commit

880f459

verified ·

1 Parent(s): 9c564f1

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -8,7 +8,6 @@ import docx
 import shutil
 import os
 import io
-from transformers import pipeline, CLIPProcessor, CLIPModel
 from datetime import datetime
 import uvicorn
 # Hugging Face GPT or LLM model for content-based name generation
@@ -72,17 +71,6 @@ def extract_text_from_docx(docx_path):
         text += para.text
     return text
-# Function to extract text from images
-def extract_text_from_image(image_path):
-    image = Image.open(image_path)
-    return pytesseract.image_to_string(image)
-# Function to extract image features
-def extract_features_from_image(image_path):
-    image = Image.open(image_path)
-    inputs = clip_processor(images=image, return_tensors="pt")
-    outputs = clip_model.get_image_features(**inputs)
-    return outputs
 # Function to process files
 def process_files(files, industry):
@@ -101,8 +89,8 @@ def process_files(files, industry):
                 text = extract_text_from_pdf(file_path)
             elif filename.endswith('.docx'):
                 text = extract_text_from_docx(file_path)
-            elif filename.endswith(('png', 'jpg', 'jpeg')):
-                text = extract_text_from_image(file_path)
             # Generate name based on LLM and include timestamp for uniqueness
             content_name = generate_name_based_on_content(text,industry) if text else 'Untitled'

 import shutil
 import os
 import io
 from datetime import datetime
 import uvicorn
 # Hugging Face GPT or LLM model for content-based name generation
         text += para.text
     return text
 # Function to process files
 def process_files(files, industry):
                 text = extract_text_from_pdf(file_path)
             elif filename.endswith('.docx'):
                 text = extract_text_from_docx(file_path)
+            else:
+                print("Invalid")
             # Generate name based on LLM and include timestamp for uniqueness
             content_name = generate_name_based_on_content(text,industry) if text else 'Untitled'