Spaces:

mtyrrell
/

prefilter_app

Sleeping

App Files Files Community

prefilter_app / modules /llm.py

mtyrrell

Fresh start with LFS for images

bc92a1b 3 months ago

raw

history blame

3.49 kB

	# Helper functions for pipeline
	from datetime import datetime, timedelta
	from collections import defaultdict, namedtuple, Counter
	from typing import List, Dict, Any
	import torch
	import logging
	from transformers import pipeline
	from modules.utils import setup_logging
	from modules.prompts import prompt_concept
	from modules.models import ConceptClassify
	from openai import OpenAI

	logger = setup_logging()

	def call_structured(client: OpenAI, deployment: str, system_prompt: str, user_prompt: str,
	response_model: None,
	logger: logging.Logger) -> Dict[str, Any]:
	"""Call Azure OpenAI with structured output"""
	system_prompt = "You are assessing grant applications for an open funding call."
	try:
	if deployment in ['o4-mini','o3',"gpt-5","gpt-5-mini","gpt-5-nano"]:
	response = client.responses.parse(
	model=deployment,
	reasoning={"effort": "low"},
	input=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt},
	],
	text_format=response_model)
	else:
	response = client.responses.parse(
	model=deployment,
	input=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt},
	],
	temperature=0,
	text_format=response_model)

	result = response.output_parsed

	# Return data + cost info
	return result.model_dump()

	except Exception as e:
	logger.error(f"Error calling Azure OpenAI for {response_model.__name__}: {e}")
	# Return default error response for ConceptClassify model
	return None


	# Not used - results sucked
	# def check_duplicate_concepts(client, deployment, concept_id: str, organization: str, concept_profile: str, df) -> bool:
	# """
	# Check for duplicate concepts within the same organization using Azure OpenAI

	# Args:
	# client: AzureOpenAI client instance
	# deployment: Azure OpenAI deployment name
	# concept_id: ID of the current concept being checked
	# organization: Organization name
	# concept_profile: Text description of the concept to check
	# df: DataFrame containing all application data

	# Returns:
	# Boolean classification result
	# """

	# # Remove current concept from the dataframe
	# df_check = df[df['id'] != concept_id].copy()

	# # Get other concepts from the same organization
	# org_concepts = df_check[df_check['org_renamed'] == organization]
	# other_concepts = org_concepts['scope_txt'].tolist()

	# # If no other concepts from this organization, return False
	# if len(other_concepts) == 0:
	# return False

	# logger.info(f"Checking duplicates for concept ID {concept_id} from organization {organization} against {len(other_concepts)} other concept(s).")
	# logger.info(f"Scope text {concept_profile}")
	# # Construct prompt
	# prompt = prompt_concept(concept_profile, other_concepts)

	# response = call_structured(client, deployment, prompt, concept_profile, ConceptClassify, logger)

	# check = response['classification']
	# logger.info(f"Duplicate check response for concept ID {concept_id}: {check}")
	# if check == "YES":
	# return True
	# return False