Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -8,7 +8,6 @@ import docx
|
|
| 8 |
import shutil
|
| 9 |
import os
|
| 10 |
import io
|
| 11 |
-
from transformers import pipeline, CLIPProcessor, CLIPModel
|
| 12 |
from datetime import datetime
|
| 13 |
import uvicorn
|
| 14 |
# Hugging Face GPT or LLM model for content-based name generation
|
|
@@ -72,17 +71,6 @@ def extract_text_from_docx(docx_path):
|
|
| 72 |
text += para.text
|
| 73 |
return text
|
| 74 |
|
| 75 |
-
# Function to extract text from images
|
| 76 |
-
def extract_text_from_image(image_path):
|
| 77 |
-
image = Image.open(image_path)
|
| 78 |
-
return pytesseract.image_to_string(image)
|
| 79 |
-
|
| 80 |
-
# Function to extract image features
|
| 81 |
-
def extract_features_from_image(image_path):
|
| 82 |
-
image = Image.open(image_path)
|
| 83 |
-
inputs = clip_processor(images=image, return_tensors="pt")
|
| 84 |
-
outputs = clip_model.get_image_features(**inputs)
|
| 85 |
-
return outputs
|
| 86 |
|
| 87 |
# Function to process files
|
| 88 |
def process_files(files, industry):
|
|
@@ -101,8 +89,8 @@ def process_files(files, industry):
|
|
| 101 |
text = extract_text_from_pdf(file_path)
|
| 102 |
elif filename.endswith('.docx'):
|
| 103 |
text = extract_text_from_docx(file_path)
|
| 104 |
-
|
| 105 |
-
|
| 106 |
|
| 107 |
# Generate name based on LLM and include timestamp for uniqueness
|
| 108 |
content_name = generate_name_based_on_content(text,industry) if text else 'Untitled'
|
|
|
|
| 8 |
import shutil
|
| 9 |
import os
|
| 10 |
import io
|
|
|
|
| 11 |
from datetime import datetime
|
| 12 |
import uvicorn
|
| 13 |
# Hugging Face GPT or LLM model for content-based name generation
|
|
|
|
| 71 |
text += para.text
|
| 72 |
return text
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
# Function to process files
|
| 76 |
def process_files(files, industry):
|
|
|
|
| 89 |
text = extract_text_from_pdf(file_path)
|
| 90 |
elif filename.endswith('.docx'):
|
| 91 |
text = extract_text_from_docx(file_path)
|
| 92 |
+
else:
|
| 93 |
+
print("Invalid")
|
| 94 |
|
| 95 |
# Generate name based on LLM and include timestamp for uniqueness
|
| 96 |
content_name = generate_name_based_on_content(text,industry) if text else 'Untitled'
|