mtyrrell's picture
Fresh start with LFS for images
bc92a1b
raw
history blame
3.49 kB
# Helper functions for pipeline
from datetime import datetime, timedelta
from collections import defaultdict, namedtuple, Counter
from typing import List, Dict, Any
import torch
import logging
from transformers import pipeline
from modules.utils import setup_logging
from modules.prompts import prompt_concept
from modules.models import ConceptClassify
from openai import OpenAI
logger = setup_logging()
def call_structured(client: OpenAI, deployment: str, system_prompt: str, user_prompt: str,
response_model: None,
logger: logging.Logger) -> Dict[str, Any]:
"""Call Azure OpenAI with structured output"""
system_prompt = "You are assessing grant applications for an open funding call."
try:
if deployment in ['o4-mini','o3',"gpt-5","gpt-5-mini","gpt-5-nano"]:
response = client.responses.parse(
model=deployment,
reasoning={"effort": "low"},
input=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
text_format=response_model)
else:
response = client.responses.parse(
model=deployment,
input=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
temperature=0,
text_format=response_model)
result = response.output_parsed
# Return data + cost info
return result.model_dump()
except Exception as e:
logger.error(f"Error calling Azure OpenAI for {response_model.__name__}: {e}")
# Return default error response for ConceptClassify model
return None
# Not used - results sucked
# def check_duplicate_concepts(client, deployment, concept_id: str, organization: str, concept_profile: str, df) -> bool:
# """
# Check for duplicate concepts within the same organization using Azure OpenAI
# Args:
# client: AzureOpenAI client instance
# deployment: Azure OpenAI deployment name
# concept_id: ID of the current concept being checked
# organization: Organization name
# concept_profile: Text description of the concept to check
# df: DataFrame containing all application data
# Returns:
# Boolean classification result
# """
# # Remove current concept from the dataframe
# df_check = df[df['id'] != concept_id].copy()
# # Get other concepts from the same organization
# org_concepts = df_check[df_check['org_renamed'] == organization]
# other_concepts = org_concepts['scope_txt'].tolist()
# # If no other concepts from this organization, return False
# if len(other_concepts) == 0:
# return False
# logger.info(f"Checking duplicates for concept ID {concept_id} from organization {organization} against {len(other_concepts)} other concept(s).")
# logger.info(f"Scope text {concept_profile}")
# # Construct prompt
# prompt = prompt_concept(concept_profile, other_concepts)
# response = call_structured(client, deployment, prompt, concept_profile, ConceptClassify, logger)
# check = response['classification']
# logger.info(f"Duplicate check response for concept ID {concept_id}: {check}")
# if check == "YES":
# return True
# return False