codeboosterstech commited on
Commit
e7a235b
·
verified ·
1 Parent(s): 9ec6879

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -25
app.py CHANGED
@@ -69,39 +69,49 @@ class SerpClient:
69
  # ---------------------------
70
  # Safe file text extraction (Gradio returns FileData dict: {"name", "size", "path"})
71
  # ---------------------------
72
- def extract_text_from_gradio_file(filedata: Optional[dict]) -> str:
 
 
 
 
 
 
 
73
  if not filedata:
74
  return ""
75
- # filedata is a dict with 'name' and 'path' in Spaces
76
- file_path = filedata.get("path") or filedata.get("tmp_path") or filedata.get("name")
77
- if not file_path:
78
- return ""
79
- file_path = str(file_path)
 
 
 
 
 
 
 
 
 
 
80
  try:
81
- lower = file_path.lower()
82
  if lower.endswith(".txt"):
83
- with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
84
- return f.read()
85
  if lower.endswith(".pdf"):
86
- try:
87
- from pypdf import PdfReader
88
- reader = PdfReader(file_path)
89
- return "\n".join([p.extract_text() or "" for p in reader.pages])
90
- except Exception:
91
- # fallback: try binary read and decode
92
- with open(file_path, "rb") as f:
93
- return f.read().decode("utf-8", errors="ignore")
94
  if lower.endswith(".docx"):
95
- try:
96
- import docx
97
- doc = docx.Document(file_path)
98
- return "\n".join([p.text for p in doc.paragraphs])
99
- except Exception:
100
- with open(file_path, "rb") as f:
101
- return f.read().decode("utf-8", errors="ignore")
102
- # Fallback: read bytes
103
  with open(file_path, "rb") as f:
104
  return f.read().decode("utf-8", errors="ignore")
 
105
  except Exception:
106
  return ""
107
 
 
69
  # ---------------------------
70
  # Safe file text extraction (Gradio returns FileData dict: {"name", "size", "path"})
71
  # ---------------------------
72
+ def extract_text_from_gradio_file(filedata):
73
+ """
74
+ Supports BOTH:
75
+ 1. HF Spaces dict:
76
+ {"name": "..", "path": "...", "size": ...}
77
+ 2. HF NamedString:
78
+ filedata = NamedString("/tmp/.../file.txt")
79
+ """
80
  if not filedata:
81
  return ""
82
+
83
+ # Case A: filedata is a dict
84
+ if isinstance(filedata, dict):
85
+ file_path = filedata.get("path") or filedata.get("name")
86
+ if not file_path:
87
+ return ""
88
+ file_path = str(file_path)
89
+
90
+ # Case B: filedata is NamedString (just a string)
91
+ else:
92
+ # Gradio NamedString gives direct file path
93
+ file_path = str(filedata)
94
+
95
+ lower = file_path.lower()
96
+
97
  try:
 
98
  if lower.endswith(".txt"):
99
+ return open(file_path, "r", encoding="utf-8", errors="ignore").read()
100
+
101
  if lower.endswith(".pdf"):
102
+ from pypdf import PdfReader
103
+ reader = PdfReader(file_path)
104
+ return "\n".join([p.extract_text() or "" for p in reader.pages])
105
+
 
 
 
 
106
  if lower.endswith(".docx"):
107
+ import docx
108
+ doc = docx.Document(file_path)
109
+ return "\n".join([p.text for p in doc.paragraphs])
110
+
111
+ # fallback
 
 
 
112
  with open(file_path, "rb") as f:
113
  return f.read().decode("utf-8", errors="ignore")
114
+
115
  except Exception:
116
  return ""
117