Spaces:
Running
Running
update main add max size
Browse files
main.py
CHANGED
|
@@ -71,6 +71,7 @@ app.add_middleware(
|
|
| 71 |
allow_methods=["*"],
|
| 72 |
allow_headers=["*"],
|
| 73 |
)
|
|
|
|
| 74 |
|
| 75 |
app.mount("/static",StaticFiles(directory='static'),'static')
|
| 76 |
templates = Jinja2Templates(directory='templates')
|
|
@@ -108,19 +109,24 @@ def interpret(file_img:UploadFile=File(...)):
|
|
| 108 |
return JSONResponse(content={"caption": caption[0]['generated_text']},status_code=200)
|
| 109 |
|
| 110 |
@app.post("/summerize")
|
| 111 |
-
def summerzation(file:UploadFile=File(...)):
|
| 112 |
try:
|
| 113 |
extension = file.filename.split(".")[-1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
if extension == "pdf":
|
| 115 |
-
text = get_text_from_PDF(
|
| 116 |
elif extension == "docx":
|
| 117 |
-
text = get_text_from_DOC(
|
| 118 |
elif extension == "pptx":
|
| 119 |
-
text = get_text_from_PPT(
|
| 120 |
elif extension == "xlsx":
|
| 121 |
-
text = get_text_from_EXCEL(
|
| 122 |
-
|
| 123 |
-
return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
|
| 124 |
|
| 125 |
if not text.strip():
|
| 126 |
return JSONResponse(content={'error':'File is emplty'},status_code=400)
|
|
@@ -146,7 +152,7 @@ def plot(user_need:str,file:UploadFile=File(...)):
|
|
| 146 |
if extension not in Supported_extensions:
|
| 147 |
return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
|
| 148 |
|
| 149 |
-
df = pd.read_excel(io=file.file)
|
| 150 |
|
| 151 |
message = f"""
|
| 152 |
You are a helpful assistant that helps users write Python code.
|
|
@@ -215,8 +221,8 @@ def get_text_from_PDF(file_content):
|
|
| 215 |
text += page.get_text()
|
| 216 |
return text
|
| 217 |
|
| 218 |
-
def get_text_from_PPT(
|
| 219 |
-
prs = Presentation(
|
| 220 |
text = ""
|
| 221 |
for slide in prs.slides:
|
| 222 |
for shape in slide.shapes:
|
|
@@ -225,14 +231,14 @@ def get_text_from_PPT(file):
|
|
| 225 |
return text
|
| 226 |
|
| 227 |
|
| 228 |
-
def get_text_from_DOC(
|
| 229 |
-
doc = Document(
|
| 230 |
text = ""
|
| 231 |
for paragraph in doc.paragraphs:
|
| 232 |
text += paragraph.text
|
| 233 |
return text
|
| 234 |
|
| 235 |
def get_text_from_EXCEL(file):
|
| 236 |
-
df = pd.read_excel(io=file)
|
| 237 |
text = df.to_string()
|
| 238 |
return text
|
|
|
|
| 71 |
allow_methods=["*"],
|
| 72 |
allow_headers=["*"],
|
| 73 |
)
|
| 74 |
+
MAX_SIZE= 1 * 1024 *1024
|
| 75 |
|
| 76 |
app.mount("/static",StaticFiles(directory='static'),'static')
|
| 77 |
templates = Jinja2Templates(directory='templates')
|
|
|
|
| 109 |
return JSONResponse(content={"caption": caption[0]['generated_text']},status_code=200)
|
| 110 |
|
| 111 |
@app.post("/summerize")
|
| 112 |
+
async def summerzation(file:UploadFile=File(...)):
|
| 113 |
try:
|
| 114 |
extension = file.filename.split(".")[-1]
|
| 115 |
+
supported_ext=["pdf","xlxs","docx","ppt"]
|
| 116 |
+
if extension not in supported_ext :
|
| 117 |
+
return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
|
| 118 |
+
file_bytes = await file.file.read()
|
| 119 |
+
if len(file_bytes) > MAX_SIZE :
|
| 120 |
+
return JSONResponse(content={"error": "too large file "},status_code=400)
|
| 121 |
if extension == "pdf":
|
| 122 |
+
text = get_text_from_PDF(file_bytes)
|
| 123 |
elif extension == "docx":
|
| 124 |
+
text = get_text_from_DOC(file_bytes)
|
| 125 |
elif extension == "pptx":
|
| 126 |
+
text = get_text_from_PPT(file_bytes)
|
| 127 |
elif extension == "xlsx":
|
| 128 |
+
text = get_text_from_EXCEL(file_bytes)
|
| 129 |
+
|
|
|
|
| 130 |
|
| 131 |
if not text.strip():
|
| 132 |
return JSONResponse(content={'error':'File is emplty'},status_code=400)
|
|
|
|
| 152 |
if extension not in Supported_extensions:
|
| 153 |
return JSONResponse(content={"error": "Unsupported file type"},status_code=400)
|
| 154 |
|
| 155 |
+
df = pd.read_excel(io= file.file)
|
| 156 |
|
| 157 |
message = f"""
|
| 158 |
You are a helpful assistant that helps users write Python code.
|
|
|
|
| 221 |
text += page.get_text()
|
| 222 |
return text
|
| 223 |
|
| 224 |
+
def get_text_from_PPT(file_content):
|
| 225 |
+
prs = Presentation(io.BytesIO(file_content))
|
| 226 |
text = ""
|
| 227 |
for slide in prs.slides:
|
| 228 |
for shape in slide.shapes:
|
|
|
|
| 231 |
return text
|
| 232 |
|
| 233 |
|
| 234 |
+
def get_text_from_DOC(file_content):
|
| 235 |
+
doc = Document(io.BytesIO(file_content))
|
| 236 |
text = ""
|
| 237 |
for paragraph in doc.paragraphs:
|
| 238 |
text += paragraph.text
|
| 239 |
return text
|
| 240 |
|
| 241 |
def get_text_from_EXCEL(file):
|
| 242 |
+
df = pd.read_excel(io=io.BytesIO(file))
|
| 243 |
text = df.to_string()
|
| 244 |
return text
|