csAhmad commited on
Commit
4eb565b
·
verified ·
1 Parent(s): 3bea8a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -23
app.py CHANGED
@@ -4,38 +4,77 @@ import os
4
  import pandas as pd
5
  from sentence_transformers import SentenceTransformer
6
 
 
7
  model = SentenceTransformer("csAhmad/zoraiz-model")
8
 
9
- os.makedirs("temp/extracted", exist_ok=True)
 
 
10
 
11
- def process_zip(zip_file):
12
- extract_path = "temp/extracted"
13
- os.makedirs(extract_path, exist_ok=True)
14
 
15
- zip_path = zip_file.name # always works in Spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- # Extract ZIP
18
- with zipfile.ZipFile(zip_path, 'r') as zip_ref:
19
- zip_ref.extractall(extract_path)
20
 
21
- results = []
 
 
 
22
 
23
- # Example: process txt files
24
- for file in os.listdir(extract_path):
25
- if file.endswith(".txt"):
26
- file_path = os.path.join(extract_path, file)
27
 
28
- with open(file_path, "r", encoding="utf-8") as f:
29
- text = f.read()
30
 
31
- emb = model.encode(text)
 
 
 
 
 
32
 
33
- results.append({
34
- "file": file,
35
- "embedding_dim": len(emb),
36
- "first_10_values": str(emb[:10])
37
- })
38
 
 
 
 
39
  df = pd.DataFrame(results)
40
 
41
  output_file = "output.xlsx"
@@ -44,12 +83,15 @@ def process_zip(zip_file):
44
  return output_file
45
 
46
 
 
 
 
47
  demo = gr.Interface(
48
  fn=process_zip,
49
  inputs=gr.File(file_types=[".zip"]),
50
  outputs=gr.File(label="Download Excel"),
51
- title="ZIP to Excel Processor",
52
- description="Upload ZIP Extract files Generate embeddings Export Excel"
53
  )
54
 
55
  demo.launch()
 
4
  import pandas as pd
5
  from sentence_transformers import SentenceTransformer
6
 
7
+ # Load model from HF
8
  model = SentenceTransformer("csAhmad/zoraiz-model")
9
 
10
+ # Ensure temp directory exists
11
+ EXTRACT_PATH = "temp/extracted"
12
+ os.makedirs(EXTRACT_PATH, exist_ok=True)
13
 
 
 
 
14
 
15
+ def process_zip(zip_file):
16
+ # ---------------------------
17
+ # 1. Validate input
18
+ # ---------------------------
19
+ if zip_file is None:
20
+ raise gr.Error("Please upload a ZIP file.")
21
+
22
+ # reset extraction folder
23
+ if os.path.exists(EXTRACT_PATH):
24
+ for f in os.listdir(EXTRACT_PATH):
25
+ try:
26
+ os.remove(os.path.join(EXTRACT_PATH, f))
27
+ except:
28
+ pass
29
+
30
+ os.makedirs(EXTRACT_PATH, exist_ok=True)
31
+
32
+ zip_path = zip_file.name
33
+
34
+ # ---------------------------
35
+ # 2. Extract ZIP safely
36
+ # ---------------------------
37
+ try:
38
+ with zipfile.ZipFile(zip_path, "r") as zip_ref:
39
+ zip_ref.extractall(EXTRACT_PATH)
40
+ except zipfile.BadZipFile:
41
+ raise gr.Error("Invalid ZIP file uploaded.")
42
+
43
+ # ---------------------------
44
+ # 3. Process files
45
+ # ---------------------------
46
+ results = []
47
 
48
+ for root, dirs, files in os.walk(EXTRACT_PATH):
49
+ for file in files:
50
+ file_path = os.path.join(root, file)
51
 
52
+ # only process text files
53
+ if file.lower().endswith(".txt"):
54
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
55
+ text = f.read()
56
 
57
+ if not text.strip():
58
+ continue
 
 
59
 
60
+ emb = model.encode(text)
 
61
 
62
+ results.append({
63
+ "file": file,
64
+ "text_length": len(text),
65
+ "embedding_dim": len(emb),
66
+ "preview_embedding": str(emb[:10])
67
+ })
68
 
69
+ # ---------------------------
70
+ # 4. Handle empty results
71
+ # ---------------------------
72
+ if len(results) == 0:
73
+ raise gr.Error("No valid .txt files found inside ZIP.")
74
 
75
+ # ---------------------------
76
+ # 5. Create Excel file
77
+ # ---------------------------
78
  df = pd.DataFrame(results)
79
 
80
  output_file = "output.xlsx"
 
83
  return output_file
84
 
85
 
86
+ # ---------------------------
87
+ # 6. Gradio UI
88
+ # ---------------------------
89
  demo = gr.Interface(
90
  fn=process_zip,
91
  inputs=gr.File(file_types=[".zip"]),
92
  outputs=gr.File(label="Download Excel"),
93
+ title="ZIP Embeddings → Excel Generator",
94
+ description="Upload a ZIP containing .txt files. It will generate embeddings using SentenceTransformer and export to Excel."
95
  )
96
 
97
  demo.launch()