Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -19,7 +19,7 @@ app = FastAPI()
|
|
| 19 |
|
| 20 |
# Set up upload folder and allowed extensions
|
| 21 |
UPLOAD_FOLDER = 'uploads'
|
| 22 |
-
ALLOWED_EXTENSIONS = {'pdf', 'docx', '
|
| 23 |
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16 MB
|
| 24 |
|
| 25 |
if not os.path.exists(UPLOAD_FOLDER):
|
|
@@ -40,12 +40,10 @@ llm = ChatOpenAI(
|
|
| 40 |
)
|
| 41 |
|
| 42 |
# Load the CLIP model for image feature extraction
|
| 43 |
-
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32",from_tf=True)
|
| 44 |
-
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 45 |
|
| 46 |
# Function to generate a more appropriate name based on content
|
| 47 |
-
def generate_name_based_on_content(text):
|
| 48 |
-
prompt = f"Generate a meaningful file name for the following content: {text[:200]}" # Truncate text to first 200 characters
|
| 49 |
response = llm(prompt) # Get the model's response
|
| 50 |
|
| 51 |
# Extract the generated file name and clean it
|
|
@@ -107,7 +105,7 @@ def process_files(files, industry):
|
|
| 107 |
text = extract_text_from_image(file_path)
|
| 108 |
|
| 109 |
# Generate name based on LLM and include timestamp for uniqueness
|
| 110 |
-
content_name = generate_name_based_on_content(text) if text else 'Untitled'
|
| 111 |
directory_name = f"{industry}_{content_name}_{timestamp}"
|
| 112 |
new_dir = os.path.join(UPLOAD_FOLDER, directory_name)
|
| 113 |
if not os.path.exists(new_dir):
|
|
|
|
| 19 |
|
| 20 |
# Set up upload folder and allowed extensions
|
| 21 |
UPLOAD_FOLDER = 'uploads'
|
| 22 |
+
ALLOWED_EXTENSIONS = {'pdf', 'docx', 'txt'}
|
| 23 |
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16 MB
|
| 24 |
|
| 25 |
if not os.path.exists(UPLOAD_FOLDER):
|
|
|
|
| 40 |
)
|
| 41 |
|
| 42 |
# Load the CLIP model for image feature extraction
|
|
|
|
|
|
|
| 43 |
|
| 44 |
# Function to generate a more appropriate name based on content
|
| 45 |
+
def generate_name_based_on_content(text,industry):
|
| 46 |
+
prompt = f"Generate a meaningful file name for the following content: {text[:200]} based {industry}" # Truncate text to first 200 characters
|
| 47 |
response = llm(prompt) # Get the model's response
|
| 48 |
|
| 49 |
# Extract the generated file name and clean it
|
|
|
|
| 105 |
text = extract_text_from_image(file_path)
|
| 106 |
|
| 107 |
# Generate name based on LLM and include timestamp for uniqueness
|
| 108 |
+
content_name = generate_name_based_on_content(text,industry) if text else 'Untitled'
|
| 109 |
directory_name = f"{industry}_{content_name}_{timestamp}"
|
| 110 |
new_dir = os.path.join(UPLOAD_FOLDER, directory_name)
|
| 111 |
if not os.path.exists(new_dir):
|