Update app.py
Browse files
app.py
CHANGED
|
@@ -76,25 +76,98 @@ def chat_with_ai(user_input, chat_history):
|
|
| 76 |
def clear_history():
|
| 77 |
return [], ""
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
def upload_file(file):
|
| 80 |
if file is None:
|
| 81 |
return "No file uploaded!"
|
| 82 |
|
| 83 |
if isinstance(file, list):
|
| 84 |
file = file[0]
|
| 85 |
-
|
| 86 |
if hasattr(file, 'name'):
|
| 87 |
file_name = file.name
|
|
|
|
| 88 |
elif isinstance(file, dict):
|
| 89 |
file_name = file.get("name", "uploaded_file")
|
|
|
|
| 90 |
else:
|
| 91 |
-
|
| 92 |
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
| 94 |
if not os.path.exists("new_file"):
|
| 95 |
os.makedirs("new_file")
|
| 96 |
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
file_path = os.path.join("new_file", file_name)
|
| 99 |
if hasattr(file, "read"):
|
| 100 |
content = file.read()
|
|
|
|
| 76 |
def clear_history():
|
| 77 |
return [], ""
|
| 78 |
|
| 79 |
+
import os
|
| 80 |
+
import PyPDF2
|
| 81 |
+
import docx
|
| 82 |
+
import pandas as pd
|
| 83 |
+
|
| 84 |
+
def extract_text_from_file(file_path):
|
| 85 |
+
"""
|
| 86 |
+
Extracts text from the file based on its extension.
|
| 87 |
+
Supports: PDF, DOC/DOCX, TXT, XLS/XLSX.
|
| 88 |
+
"""
|
| 89 |
+
ext = os.path.splitext(file_path)[1].lower()
|
| 90 |
+
text = ""
|
| 91 |
+
|
| 92 |
+
if ext == ".pdf":
|
| 93 |
+
try:
|
| 94 |
+
with open(file_path, "rb") as f:
|
| 95 |
+
pdf_reader = PyPDF2.PdfReader(f)
|
| 96 |
+
for page in pdf_reader.pages:
|
| 97 |
+
page_text = page.extract_text()
|
| 98 |
+
if page_text:
|
| 99 |
+
text += page_text + "\n"
|
| 100 |
+
except Exception as e:
|
| 101 |
+
text = f"Error processing PDF: {e}"
|
| 102 |
+
|
| 103 |
+
elif ext in [".doc", ".docx"]:
|
| 104 |
+
try:
|
| 105 |
+
doc = docx.Document(file_path)
|
| 106 |
+
text = "\n".join([para.text for para in doc.paragraphs])
|
| 107 |
+
except Exception as e:
|
| 108 |
+
text = f"Error processing Word document: {e}"
|
| 109 |
+
|
| 110 |
+
elif ext == ".txt":
|
| 111 |
+
try:
|
| 112 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
| 113 |
+
text = f.read()
|
| 114 |
+
except Exception as e:
|
| 115 |
+
text = f"Error processing TXT file: {e}"
|
| 116 |
+
|
| 117 |
+
elif ext in [".xls", ".xlsx"]:
|
| 118 |
+
try:
|
| 119 |
+
# Read the first sheet of the Excel file
|
| 120 |
+
df = pd.read_excel(file_path)
|
| 121 |
+
# Convert the dataframe to CSV format (or any format you prefer)
|
| 122 |
+
text = df.to_csv(index=False)
|
| 123 |
+
except Exception as e:
|
| 124 |
+
text = f"Error processing Excel file: {e}"
|
| 125 |
+
|
| 126 |
+
else:
|
| 127 |
+
text = "Unsupported file type for text extraction."
|
| 128 |
+
|
| 129 |
+
return text
|
| 130 |
+
|
| 131 |
def upload_file(file):
|
| 132 |
if file is None:
|
| 133 |
return "No file uploaded!"
|
| 134 |
|
| 135 |
if isinstance(file, list):
|
| 136 |
file = file[0]
|
| 137 |
+
|
| 138 |
if hasattr(file, 'name'):
|
| 139 |
file_name = file.name
|
| 140 |
+
file_data = file.read()
|
| 141 |
elif isinstance(file, dict):
|
| 142 |
file_name = file.get("name", "uploaded_file")
|
| 143 |
+
file_data = file.get("data")
|
| 144 |
else:
|
| 145 |
+
return "Uploaded file format not recognized."
|
| 146 |
|
| 147 |
+
if file_data is None:
|
| 148 |
+
return "Uploaded file data not found!"
|
| 149 |
+
|
| 150 |
+
|
| 151 |
if not os.path.exists("new_file"):
|
| 152 |
os.makedirs("new_file")
|
| 153 |
|
| 154 |
|
| 155 |
+
file_path = os.path.join("new_file", file_name)
|
| 156 |
+
try:
|
| 157 |
+
with open(file_path, "wb") as f:
|
| 158 |
+
f.write(file_data)
|
| 159 |
+
except Exception as e:
|
| 160 |
+
return f"Error saving file: {e}"
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
extracted_text = extract_text_from_file(file_path)
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
preview = extracted_text[:200] + "..." if len(extracted_text) > 200 else extracted_text
|
| 167 |
+
return f"File {file_name} uploaded and processed successfully!\nExtracted text preview:\n{preview}"
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
|
| 171 |
file_path = os.path.join("new_file", file_name)
|
| 172 |
if hasattr(file, "read"):
|
| 173 |
content = file.read()
|