BabakScrapes commited on
Commit
f042f29
·
verified ·
1 Parent(s): 64d7a6d

Upgraded used packages

Browse files
Files changed (1) hide show
  1. app.py +195 -126
app.py CHANGED
@@ -1,143 +1,212 @@
1
- import os
2
- os.system("python -m pip install transformers==4.26.1")
3
- os.system("python -m pip install spacy==3.5.4")
4
- os.system("python -m pip install spacy-alignments==0.9.1")
5
- os.system("python -m pip install spacy-legacy==3.0.12")
6
- os.system("python -m pip install spacy-loggers==1.0.3")
7
- os.system("python -m pip install torch")
8
- os.system("python -m pip install seaborn==0.11.2")
9
- os.system("python -m pip install gradio==3.16.1")
10
- os.system("python -m pip install typer==0.4.1")
11
- os.system("python -m pip install pydantic==1.9.2")
12
- os.system("python -m pip install matplotlib==3.4.3")
13
- os.system("python -m pip install Flask")
14
- os.system("python -m pip install sty==1.0.4")
15
- os.system("python -m pip install numpy==1.26.4")
16
- from flask import Flask, render_template, request, send_file, after_this_request
17
- import hashlib
18
- from pipeline import *
19
  import csv
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- app = Flask(__name__, template_folder='templates', static_folder='templates')
22
-
23
- # Set the folder for saving uploaded files
24
- UPLOAD_FOLDER = 'uploads/'
25
- app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
26
- if not os.path.isdir(app.config['UPLOAD_FOLDER']):
27
- # Create the directory if it doesn't exist
28
- os.makedirs(app.config['UPLOAD_FOLDER'])
29
-
30
- # Allowable file extensions for uploading
31
- ALLOWED_EXTENSIONS = {'txt','csv'}
32
-
33
- # Check if a file has an allowable extension
34
- def allowed_file(filename):
35
- return '.' in filename and \
36
- filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
37
-
38
- # Reverse the lines in the file and return a list of dictionaries containing the original input and the processed output
39
- def process_file(file_path):
40
- file_handle = open(file_path,'r',encoding='utf-8-sig',errors='ignore')
41
-
42
- with open(file_path, 'r', encoding='utf-8-sig',errors='ignore') as f:
43
- if file_path.endswith(".txt"):
44
- lines = f.readlines()
45
- results = []
46
- for line in lines:
47
- if line.strip() != "":
48
- result = run_pipeline(line.strip())
49
- results.append(result)
50
- elif file_path.endswith(".csv"):
51
  reader = csv.reader(f)
52
- for line in reader:
53
- if line[0].strip() != "":
54
- result = run_pipeline(line[0].strip())
55
- results.append(result)
56
- result = [{'input': line.strip(), 'output': result} for line, result in zip(lines, results)]
57
- @after_this_request
58
- def remove_file(response):
59
- try:
60
- os.remove(file_path)
61
- file_handle.close()
62
- except Exception as error:
63
- app.logger.error("Error removing or closing downloaded file handle", error)
64
- return response
65
- return result
66
 
67
- # Home page route that allows users to upload files
68
- @app.route('/')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def index():
70
- # Otherwise, render the index page
71
- return render_template('index.html')
72
 
73
- # Handle file upload requests
74
- @app.route('/', methods=['POST'])
75
  def upload_file():
76
- # Get the uploaded file
77
- file = request.files['file']
78
 
79
- # If the user did not select any file, return an error message
80
- if not file:
81
- return 'No file selected'
82
 
83
- # If the file type is not allowed, return an error message
84
  if not allowed_file(file.filename):
85
- return 'File type not allowed'
86
 
87
- # Generate a unique hash code for the file name
88
- hash_code = hashlib.md5(file.read()).hexdigest()
89
- # if ".txt" in filename:
90
- filename = f"{hash_code}.txt"
91
- # elif ".csv" in filename:
92
- # filename = f"{hash_code}.csv"
93
- file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
94
 
95
- # Save the uploaded file
96
  file.seek(0)
97
- file.save(file_path)
98
 
99
- # Process the uploaded file and return the result as a JSON line file
100
- result = process_file(file_path)
101
- result_file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'result.csv')
102
 
103
- counts = {}
104
- individual_labels = {}
105
- props = {}
106
- for id_,text in enumerate(result):
107
- individual_labels[id_] = []
108
- counts[id_] = {"generic":0,"specific":0,"stative":0,"dynamic":0,"static":0,"episodic":0,"habitual":0,"NA genericity":0,"NA eventivity":0,"NA boundedness":0}
109
- for clause in text['output'][1]:
110
- individual_labels[id_].append(clause[1])
111
- label = labels2attrs[clause[1]]
112
- for id__,feature in enumerate(label):
113
- if "NA" not in feature:
114
- counts[id_][feature] += 1
115
- elif id__ == 0:
116
- counts[id_]["NA genericity"] += 1
117
- elif id__ == 1:
118
- counts[id_]["NA eventivity"] += 1
119
- else:
120
- counts[id_]["NA boundedness"] += 1
121
- props[id_] = [counts[id_]['generic']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),counts[id_]['specific']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),
122
- counts[id_]['stative']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), counts[id_]['dynamic']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']),
123
- counts[id_]['static']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]['NA boundedness']),counts[id_]['episodic']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"]),
124
- counts[id_]['habitual']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"])]
125
 
 
126
 
127
- with open(result_file_path, 'w', encoding='utf-8', errors='ignore', newline="") as f:
128
- writer = csv.writer(f)
129
- writer.writerow(["input","clauses","individual labels","genericity: generic count","genericity: specific count","eventivity: stative count","eventivity: dynamic count","boundedness: static count","boundedness: episodic count","habitual count","genericity: proportion generic","genericity: proportion specific","eventivity: proportion stative","eventivity: proportion dynamic","boundedness: proportion static","boundedness: proportion episodic","proportion habitual"])
130
- for id_ in counts.keys():
131
- clauses = []
132
- for clause in result[id_]["output"][0]:
133
- clauses.append("{}: {}".format(clause[1],clause[0]))
134
- clauses = "\n".join(clauses)
135
- ind_labels = "\n".join(individual_labels[id_])
136
- extracted = [result[id_]["input"],clauses,ind_labels,counts[id_]['generic'],counts[id_]['specific'],counts[id_]['stative'],counts[id_]['dynamic'],counts[id_]['static'],counts[id_]['episodic'],counts[id_]['habitual']]+props[id_]
137
- writer.writerow(extracted)
138
-
139
- # Return the result file as a download once the processing is complete
140
- return send_file(result_file_path, as_attachment=True)
141
-
142
- if __name__ == '__main__':
143
- app.run(host="0.0.0.0", port=7860)
 
1
+ from __future__ import annotations
2
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import csv
4
+ import hashlib
5
+ import os
6
+ import tempfile
7
+ from pathlib import Path
8
+
9
+ from flask import Flask, after_this_request, render_template, request, send_file
10
+ from werkzeug.utils import secure_filename
11
+
12
+ from pipeline import labels2attrs, run_pipeline
13
+
14
+ app = Flask(__name__, template_folder="templates", static_folder="templates")
15
+
16
+ UPLOAD_DIR = Path("uploads")
17
+ UPLOAD_DIR.mkdir(exist_ok=True)
18
 
19
+ ALLOWED_EXTENSIONS = {"txt", "csv"}
20
+
21
+
22
+ def allowed_file(filename: str) -> bool:
23
+ return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
24
+
25
+
26
+ def iter_texts_from_file(file_path: Path):
27
+ suffix = file_path.suffix.lower()
28
+
29
+ if suffix == ".txt":
30
+ with file_path.open("r", encoding="utf-8-sig", errors="ignore") as f:
31
+ for line in f:
32
+ text = line.strip()
33
+ if text:
34
+ yield text
35
+
36
+ elif suffix == ".csv":
37
+ with file_path.open("r", encoding="utf-8-sig", errors="ignore", newline="") as f:
 
 
 
 
 
 
 
 
 
 
 
38
  reader = csv.reader(f)
39
+ for row in reader:
40
+ if row and row[0].strip():
41
+ yield row[0].strip()
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+
44
+ def process_file(file_path: Path):
45
+ results = []
46
+ for text in iter_texts_from_file(file_path):
47
+ results.append({"input": text, "output": run_pipeline(text)})
48
+ return results
49
+
50
+
51
+ def summarize_result(result_rows):
52
+ counts = {}
53
+ individual_labels = {}
54
+ proportions = {}
55
+
56
+ for idx, item in enumerate(result_rows):
57
+ individual_labels[idx] = []
58
+ counts[idx] = {
59
+ "generic": 0,
60
+ "specific": 0,
61
+ "stative": 0,
62
+ "dynamic": 0,
63
+ "static": 0,
64
+ "episodic": 0,
65
+ "habitual": 0,
66
+ "NA genericity": 0,
67
+ "NA eventivity": 0,
68
+ "NA boundedness": 0,
69
+ }
70
+
71
+ for clause_text, label_name in item["output"][1]:
72
+ individual_labels[idx].append(label_name)
73
+ attrs = labels2attrs[label_name]
74
+
75
+ for attr_index, feature in enumerate(attrs):
76
+ if "NA" not in feature:
77
+ counts[idx][feature] += 1
78
+ elif attr_index == 0:
79
+ counts[idx]["NA genericity"] += 1
80
+ elif attr_index == 1:
81
+ counts[idx]["NA eventivity"] += 1
82
+ else:
83
+ counts[idx]["NA boundedness"] += 1
84
+
85
+ gen_total = (
86
+ counts[idx]["generic"]
87
+ + counts[idx]["specific"]
88
+ + counts[idx]["NA genericity"]
89
+ )
90
+ evt_total = (
91
+ counts[idx]["stative"]
92
+ + counts[idx]["dynamic"]
93
+ + counts[idx]["NA eventivity"]
94
+ )
95
+ bnd_total = (
96
+ counts[idx]["static"]
97
+ + counts[idx]["episodic"]
98
+ + counts[idx]["habitual"]
99
+ + counts[idx]["NA boundedness"]
100
+ )
101
+
102
+ proportions[idx] = [
103
+ counts[idx]["generic"] / gen_total if gen_total else 0.0,
104
+ counts[idx]["specific"] / gen_total if gen_total else 0.0,
105
+ counts[idx]["stative"] / evt_total if evt_total else 0.0,
106
+ counts[idx]["dynamic"] / evt_total if evt_total else 0.0,
107
+ counts[idx]["static"] / bnd_total if bnd_total else 0.0,
108
+ counts[idx]["episodic"] / bnd_total if bnd_total else 0.0,
109
+ counts[idx]["habitual"] / bnd_total if bnd_total else 0.0,
110
+ ]
111
+
112
+ return counts, individual_labels, proportions
113
+
114
+
115
+ def write_results_csv(result_rows) -> str:
116
+ counts, individual_labels, proportions = summarize_result(result_rows)
117
+
118
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
119
+ tmp_path = tmp.name
120
+ tmp.close()
121
+
122
+ with open(tmp_path, "w", encoding="utf-8", newline="") as f:
123
+ writer = csv.writer(f)
124
+ writer.writerow([
125
+ "input",
126
+ "clauses",
127
+ "individual labels",
128
+ "genericity: generic count",
129
+ "genericity: specific count",
130
+ "eventivity: stative count",
131
+ "eventivity: dynamic count",
132
+ "boundedness: static count",
133
+ "boundedness: episodic count",
134
+ "habitual count",
135
+ "genericity: proportion generic",
136
+ "genericity: proportion specific",
137
+ "eventivity: proportion stative",
138
+ "eventivity: proportion dynamic",
139
+ "boundedness: proportion static",
140
+ "boundedness: proportion episodic",
141
+ "proportion habitual",
142
+ ])
143
+
144
+ for idx, item in enumerate(result_rows):
145
+ clauses = "\n".join(
146
+ f"{clause_id}: {clause_text}"
147
+ for clause_text, clause_id in item["output"][0]
148
+ )
149
+ label_lines = "\n".join(individual_labels[idx])
150
+
151
+ row = [
152
+ item["input"],
153
+ clauses,
154
+ label_lines,
155
+ counts[idx]["generic"],
156
+ counts[idx]["specific"],
157
+ counts[idx]["stative"],
158
+ counts[idx]["dynamic"],
159
+ counts[idx]["static"],
160
+ counts[idx]["episodic"],
161
+ counts[idx]["habitual"],
162
+ *proportions[idx],
163
+ ]
164
+ writer.writerow(row)
165
+
166
+ return tmp_path
167
+
168
+
169
+ @app.route("/", methods=["GET"])
170
  def index():
171
+ return render_template("index.html")
172
+
173
 
174
+ @app.route("/", methods=["POST"])
 
175
  def upload_file():
176
+ if "file" not in request.files:
177
+ return "No file selected", 400
178
 
179
+ file = request.files["file"]
180
+ if not file or file.filename == "":
181
+ return "No file selected", 400
182
 
 
183
  if not allowed_file(file.filename):
184
+ return "File type not allowed", 400
185
 
186
+ original_name = secure_filename(file.filename)
187
+ suffix = Path(original_name).suffix.lower()
188
+ file_hash = hashlib.md5(file.read()).hexdigest()
189
+ saved_path = UPLOAD_DIR / f"{file_hash}{suffix}"
 
 
 
190
 
 
191
  file.seek(0)
192
+ file.save(saved_path)
193
 
194
+ result_rows = process_file(saved_path)
195
+ output_csv = write_results_csv(result_rows)
 
196
 
197
+ @after_this_request
198
+ def cleanup(response):
199
+ try:
200
+ if saved_path.exists():
201
+ saved_path.unlink()
202
+ if os.path.exists(output_csv):
203
+ os.remove(output_csv)
204
+ except Exception:
205
+ app.logger.exception("Failed to clean up temp files")
206
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
+ return send_file(output_csv, as_attachment=True, download_name="result.csv")
209
 
210
+
211
+ if __name__ == "__main__":
212
+ app.run(host="0.0.0.0", port=7860, debug=False)