Anecedotal_Discourse_Classifier_Multitext

Running

App Files Files Community

BabakScrapes commited on Mar 13

Commit

f042f29

verified ·

1 Parent(s): 64d7a6d

Upgraded used packages

Browse files

Files changed (1) hide show

app.py +195 -126

app.py CHANGED Viewed

@@ -1,143 +1,212 @@
-import os
-os.system("python -m pip install transformers==4.26.1")
-os.system("python -m pip install spacy==3.5.4")
-os.system("python -m pip install spacy-alignments==0.9.1")
-os.system("python -m pip install spacy-legacy==3.0.12")
-os.system("python -m pip install spacy-loggers==1.0.3")
-os.system("python -m pip install torch")
-os.system("python -m pip install seaborn==0.11.2")
-os.system("python -m pip install gradio==3.16.1")
-os.system("python -m pip install typer==0.4.1")
-os.system("python -m pip install pydantic==1.9.2")
-os.system("python -m pip install matplotlib==3.4.3")
-os.system("python -m pip install Flask")
-os.system("python -m pip install sty==1.0.4")
-os.system("python -m pip install numpy==1.26.4")
-from flask import Flask, render_template, request, send_file, after_this_request
-import hashlib
-from pipeline import *
 import csv
-app = Flask(__name__, template_folder='templates', static_folder='templates')
-# Set the folder for saving uploaded files
-UPLOAD_FOLDER = 'uploads/'
-app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
-if not os.path.isdir(app.config['UPLOAD_FOLDER']):
-    # Create the directory if it doesn't exist
-    os.makedirs(app.config['UPLOAD_FOLDER'])
-# Allowable file extensions for uploading
-ALLOWED_EXTENSIONS = {'txt','csv'}
-# Check if a file has an allowable extension
-def allowed_file(filename):
-    return '.' in filename and \
-           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
-# Reverse the lines in the file and return a list of dictionaries containing the original input and the processed output
-def process_file(file_path):
-    file_handle = open(file_path,'r',encoding='utf-8-sig',errors='ignore')
-    with open(file_path, 'r', encoding='utf-8-sig',errors='ignore') as f:
-        if file_path.endswith(".txt"):
-            lines = f.readlines()
-            results = []
-            for line in lines:
-                if line.strip() != "":
-                    result = run_pipeline(line.strip())
-                    results.append(result)
-        elif file_path.endswith(".csv"):
             reader = csv.reader(f)
-            for line in reader:
-                if line[0].strip() != "":
-                    result = run_pipeline(line[0].strip())
-                    results.append(result)
-    result = [{'input': line.strip(), 'output': result} for line, result in zip(lines, results)]
-    @after_this_request
-    def remove_file(response):
-        try:
-            os.remove(file_path)
-            file_handle.close()
-        except Exception as error:
-            app.logger.error("Error removing or closing downloaded file handle", error)
-        return response
-    return result
-# Home page route that allows users to upload files
-@app.route('/')
 def index():
-    # Otherwise, render the index page
-    return render_template('index.html')
-# Handle file upload requests
-@app.route('/', methods=['POST'])
 def upload_file():
-    # Get the uploaded file
-    file = request.files['file']
-    # If the user did not select any file, return an error message
-    if not file:
-        return 'No file selected'
-    # If the file type is not allowed, return an error message
     if not allowed_file(file.filename):
-        return 'File type not allowed'
-    # Generate a unique hash code for the file name
-    hash_code = hashlib.md5(file.read()).hexdigest()
-    # if ".txt" in filename:
-    filename = f"{hash_code}.txt"
-    # elif ".csv" in filename:
-    #     filename = f"{hash_code}.csv"
-    file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
-    # Save the uploaded file
     file.seek(0)
-    file.save(file_path)
-    # Process the uploaded file and return the result as a JSON line file
-    result = process_file(file_path)
-    result_file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'result.csv')
-    counts = {}
-    individual_labels = {}
-    props = {}
-    for id_,text in enumerate(result):
-        individual_labels[id_] = []
-        counts[id_] = {"generic":0,"specific":0,"stative":0,"dynamic":0,"static":0,"episodic":0,"habitual":0,"NA genericity":0,"NA eventivity":0,"NA boundedness":0}
-        for clause in text['output'][1]:
-            individual_labels[id_].append(clause[1])
-            label = labels2attrs[clause[1]]
-            for id__,feature in enumerate(label):
-                if "NA" not in feature:
-                    counts[id_][feature] += 1
-                elif id__ == 0:
-                    counts[id_]["NA genericity"] += 1
-                elif id__ == 1:
-                    counts[id_]["NA eventivity"] += 1
-                else:
-                    counts[id_]["NA boundedness"] += 1
-            props[id_] = [counts[id_]['generic']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),counts[id_]['specific']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),
-                counts[id_]['stative']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), counts[id_]['dynamic']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']),
-                counts[id_]['static']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]['NA boundedness']),counts[id_]['episodic']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"]),
-                counts[id_]['habitual']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"])]
-    with open(result_file_path, 'w', encoding='utf-8', errors='ignore', newline="") as f:
-        writer = csv.writer(f)
-        writer.writerow(["input","clauses","individual labels","genericity: generic count","genericity: specific count","eventivity: stative count","eventivity: dynamic count","boundedness: static count","boundedness: episodic count","habitual count","genericity:  proportion generic","genericity: proportion specific","eventivity: proportion stative","eventivity: proportion dynamic","boundedness: proportion static","boundedness: proportion episodic","proportion habitual"])
-        for id_ in counts.keys():
-            clauses = []
-            for clause in result[id_]["output"][0]:
-                clauses.append("{}: {}".format(clause[1],clause[0]))
-            clauses = "\n".join(clauses)
-            ind_labels = "\n".join(individual_labels[id_])
-            extracted = [result[id_]["input"],clauses,ind_labels,counts[id_]['generic'],counts[id_]['specific'],counts[id_]['stative'],counts[id_]['dynamic'],counts[id_]['static'],counts[id_]['episodic'],counts[id_]['habitual']]+props[id_]
-            writer.writerow(extracted)
-    # Return the result file as a download once the processing is complete
-    return send_file(result_file_path, as_attachment=True)
-if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=7860)

+from __future__ import annotations
 import csv
+import hashlib
+import os
+import tempfile
+from pathlib import Path
+from flask import Flask, after_this_request, render_template, request, send_file
+from werkzeug.utils import secure_filename
+from pipeline import labels2attrs, run_pipeline
+app = Flask(__name__, template_folder="templates", static_folder="templates")
+UPLOAD_DIR = Path("uploads")
+UPLOAD_DIR.mkdir(exist_ok=True)
+ALLOWED_EXTENSIONS = {"txt", "csv"}
+def allowed_file(filename: str) -> bool:
+    return "." in filename and filename.rsplit(".", 1)[1].lower() in ALLOWED_EXTENSIONS
+def iter_texts_from_file(file_path: Path):
+    suffix = file_path.suffix.lower()
+    if suffix == ".txt":
+        with file_path.open("r", encoding="utf-8-sig", errors="ignore") as f:
+            for line in f:
+                text = line.strip()
+                if text:
+                    yield text
+    elif suffix == ".csv":
+        with file_path.open("r", encoding="utf-8-sig", errors="ignore", newline="") as f:
             reader = csv.reader(f)
+            for row in reader:
+                if row and row[0].strip():
+                    yield row[0].strip()
+def process_file(file_path: Path):
+    results = []
+    for text in iter_texts_from_file(file_path):
+        results.append({"input": text, "output": run_pipeline(text)})
+    return results
+def summarize_result(result_rows):
+    counts = {}
+    individual_labels = {}
+    proportions = {}
+    for idx, item in enumerate(result_rows):
+        individual_labels[idx] = []
+        counts[idx] = {
+            "generic": 0,
+            "specific": 0,
+            "stative": 0,
+            "dynamic": 0,
+            "static": 0,
+            "episodic": 0,
+            "habitual": 0,
+            "NA genericity": 0,
+            "NA eventivity": 0,
+            "NA boundedness": 0,
+        }
+        for clause_text, label_name in item["output"][1]:
+            individual_labels[idx].append(label_name)
+            attrs = labels2attrs[label_name]
+            for attr_index, feature in enumerate(attrs):
+                if "NA" not in feature:
+                    counts[idx][feature] += 1
+                elif attr_index == 0:
+                    counts[idx]["NA genericity"] += 1
+                elif attr_index == 1:
+                    counts[idx]["NA eventivity"] += 1
+                else:
+                    counts[idx]["NA boundedness"] += 1
+        gen_total = (
+            counts[idx]["generic"]
+            + counts[idx]["specific"]
+            + counts[idx]["NA genericity"]
+        )
+        evt_total = (
+            counts[idx]["stative"]
+            + counts[idx]["dynamic"]
+            + counts[idx]["NA eventivity"]
+        )
+        bnd_total = (
+            counts[idx]["static"]
+            + counts[idx]["episodic"]
+            + counts[idx]["habitual"]
+            + counts[idx]["NA boundedness"]
+        )
+        proportions[idx] = [
+            counts[idx]["generic"] / gen_total if gen_total else 0.0,
+            counts[idx]["specific"] / gen_total if gen_total else 0.0,
+            counts[idx]["stative"] / evt_total if evt_total else 0.0,
+            counts[idx]["dynamic"] / evt_total if evt_total else 0.0,
+            counts[idx]["static"] / bnd_total if bnd_total else 0.0,
+            counts[idx]["episodic"] / bnd_total if bnd_total else 0.0,
+            counts[idx]["habitual"] / bnd_total if bnd_total else 0.0,
+        ]
+    return counts, individual_labels, proportions
+def write_results_csv(result_rows) -> str:
+    counts, individual_labels, proportions = summarize_result(result_rows)
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+    tmp_path = tmp.name
+    tmp.close()
+    with open(tmp_path, "w", encoding="utf-8", newline="") as f:
+        writer = csv.writer(f)
+        writer.writerow([
+            "input",
+            "clauses",
+            "individual labels",
+            "genericity: generic count",
+            "genericity: specific count",
+            "eventivity: stative count",
+            "eventivity: dynamic count",
+            "boundedness: static count",
+            "boundedness: episodic count",
+            "habitual count",
+            "genericity: proportion generic",
+            "genericity: proportion specific",
+            "eventivity: proportion stative",
+            "eventivity: proportion dynamic",
+            "boundedness: proportion static",
+            "boundedness: proportion episodic",
+            "proportion habitual",
+        ])
+        for idx, item in enumerate(result_rows):
+            clauses = "\n".join(
+                f"{clause_id}: {clause_text}"
+                for clause_text, clause_id in item["output"][0]
+            )
+            label_lines = "\n".join(individual_labels[idx])
+            row = [
+                item["input"],
+                clauses,
+                label_lines,
+                counts[idx]["generic"],
+                counts[idx]["specific"],
+                counts[idx]["stative"],
+                counts[idx]["dynamic"],
+                counts[idx]["static"],
+                counts[idx]["episodic"],
+                counts[idx]["habitual"],
+                *proportions[idx],
+            ]
+            writer.writerow(row)
+    return tmp_path
+@app.route("/", methods=["GET"])
 def index():
+    return render_template("index.html")
+@app.route("/", methods=["POST"])
 def upload_file():
+    if "file" not in request.files:
+        return "No file selected", 400
+    file = request.files["file"]
+    if not file or file.filename == "":
+        return "No file selected", 400
     if not allowed_file(file.filename):
+        return "File type not allowed", 400
+    original_name = secure_filename(file.filename)
+    suffix = Path(original_name).suffix.lower()
+    file_hash = hashlib.md5(file.read()).hexdigest()
+    saved_path = UPLOAD_DIR / f"{file_hash}{suffix}"
     file.seek(0)
+    file.save(saved_path)
+    result_rows = process_file(saved_path)
+    output_csv = write_results_csv(result_rows)
+    @after_this_request
+    def cleanup(response):
+        try:
+            if saved_path.exists():
+                saved_path.unlink()
+            if os.path.exists(output_csv):
+                os.remove(output_csv)
+        except Exception:
+            app.logger.exception("Failed to clean up temp files")
+        return response
+    return send_file(output_csv, as_attachment=True, download_name="result.csv")
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860, debug=False)