Alexvatti commited on
Commit
880f459
·
verified ·
1 Parent(s): 9c564f1

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +2 -14
main.py CHANGED
@@ -8,7 +8,6 @@ import docx
8
  import shutil
9
  import os
10
  import io
11
- from transformers import pipeline, CLIPProcessor, CLIPModel
12
  from datetime import datetime
13
  import uvicorn
14
  # Hugging Face GPT or LLM model for content-based name generation
@@ -72,17 +71,6 @@ def extract_text_from_docx(docx_path):
72
  text += para.text
73
  return text
74
 
75
- # Function to extract text from images
76
- def extract_text_from_image(image_path):
77
- image = Image.open(image_path)
78
- return pytesseract.image_to_string(image)
79
-
80
- # Function to extract image features
81
- def extract_features_from_image(image_path):
82
- image = Image.open(image_path)
83
- inputs = clip_processor(images=image, return_tensors="pt")
84
- outputs = clip_model.get_image_features(**inputs)
85
- return outputs
86
 
87
  # Function to process files
88
  def process_files(files, industry):
@@ -101,8 +89,8 @@ def process_files(files, industry):
101
  text = extract_text_from_pdf(file_path)
102
  elif filename.endswith('.docx'):
103
  text = extract_text_from_docx(file_path)
104
- elif filename.endswith(('png', 'jpg', 'jpeg')):
105
- text = extract_text_from_image(file_path)
106
 
107
  # Generate name based on LLM and include timestamp for uniqueness
108
  content_name = generate_name_based_on_content(text,industry) if text else 'Untitled'
 
8
  import shutil
9
  import os
10
  import io
 
11
  from datetime import datetime
12
  import uvicorn
13
  # Hugging Face GPT or LLM model for content-based name generation
 
71
  text += para.text
72
  return text
73
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  # Function to process files
76
  def process_files(files, industry):
 
89
  text = extract_text_from_pdf(file_path)
90
  elif filename.endswith('.docx'):
91
  text = extract_text_from_docx(file_path)
92
+ else:
93
+ print("Invalid")
94
 
95
  # Generate name based on LLM and include timestamp for uniqueness
96
  content_name = generate_name_based_on_content(text,industry) if text else 'Untitled'