Spaces:
Running
Running
| import re | |
| from io import BytesIO | |
| from openpyxl import Workbook | |
| from openpyxl.styles import Font, NamedStyle, PatternFill | |
| from openpyxl.styles.differential import DifferentialStyle | |
| import logging | |
| from logging.handlers import RotatingFileHandler | |
| import os | |
| import configparser | |
| def setup_logging(): | |
| # Set up logging | |
| log_dir = 'logs' | |
| os.makedirs(log_dir, exist_ok=True) | |
| log_file = os.path.join(log_dir, 'app.log') | |
| # Create a RotatingFileHandler | |
| file_handler = RotatingFileHandler(log_file, maxBytes=1024 * 1024, backupCount=5) | |
| file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) | |
| # Configure the root logger | |
| logging.basicConfig(level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[file_handler, logging.StreamHandler()]) | |
| # Return a logger instance | |
| return logging.getLogger(__name__) | |
| def getconfig(configfile_path: str): | |
| """ | |
| Read the config file | |
| Params | |
| ---------------- | |
| configfile_path: file path of .cfg file | |
| """ | |
| config = configparser.ConfigParser() | |
| try: | |
| config.read_file(open(configfile_path)) | |
| return config | |
| except: | |
| logging.warning("config file not found") | |
| # Function for creating Upload template file | |
| def create_excel(): | |
| wb = Workbook() | |
| sheet = wb.active | |
| sheet.title = "template" | |
| columns = ['id', | |
| 'organization', | |
| 'scope', | |
| 'technology', | |
| 'financial', | |
| 'barrier', | |
| 'technology_rationale', | |
| 'project_rationale', | |
| 'project_objectives', | |
| 'maf_funding_requested', | |
| 'contributions_public_sector', | |
| 'contributions_private_sector', | |
| 'contributions_other', | |
| 'mitigation_potential'] | |
| sheet.append(columns) # Appending columns to the first row | |
| # formatting | |
| for c in sheet['A1:N4'][0]: | |
| c.fill = PatternFill('solid', fgColor = 'bad8e1') | |
| c.font = Font(bold=True) | |
| # Save to a BytesIO object | |
| output = BytesIO() | |
| wb.save(output) | |
| return output.getvalue() | |
| # Function to clean text | |
| def clean_text(input_text): | |
| cleaned_text = re.sub(r"[^a-zA-Z0-9\s.,:;!?()\-\n]", "", input_text) | |
| cleaned_text = re.sub(r"x000D", "", cleaned_text) | |
| cleaned_text = re.sub(r"\s+", " ", cleaned_text) | |
| cleaned_text = re.sub(r"\n+", "\n", cleaned_text) | |
| return cleaned_text | |
| # # Function for extracting classifications for each SECTOR label | |
| def extract_predicted_labels(output, ordinal_selection=1, threshold=0.5): | |
| # verify output is a list of dictionaries | |
| if isinstance(output, list) and all(isinstance(item, dict) for item in output): | |
| # filter items with scores above the threshold | |
| filtered_items = [item for item in output if item.get('score', 0) > threshold] | |
| # sort the filtered items by score in descending order | |
| sorted_items = sorted(filtered_items, key=lambda x: x.get('score', 0), reverse=True) | |
| # extract the highest and second-highest labels | |
| if len(sorted_items) >= 2: | |
| highest_label = sorted_items[0].get('label') | |
| second_highest_label = sorted_items[1].get('label') | |
| elif len(sorted_items) == 1: | |
| highest_label = sorted_items[0].get('label') | |
| second_highest_label = None | |
| else: | |
| print("Warning: Less than two items above the threshold in the current list.") | |
| highest_label = None | |
| second_highest_label = None | |
| else: | |
| print("Error: Inner data is not formatted correctly. Each item must be a dictionary.") | |
| highest_label = None | |
| second_highest_label = None | |
| # Output dictionary of highest and second-highest labels to the all_predicted_labels list | |
| predicted_labels = {"SECTOR1": highest_label, "SECTOR2": second_highest_label} | |
| return predicted_labels | |