Spaces:

Zwounds
/

Boolean_Search_Query_Model

Runtime error

App Files Files Community

Boolean_Search_Query_Model / demo.py

Zwounds

Upload folder using huggingface_hub

39838a2 verified 9 months ago

raw

history blame

6.89 kB

	import gradio as gr
	import torch
	from unsloth import FastLanguageModel
	import logging

	# Setup logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	def load_model():
	"""Load fine-tuned model."""
	logger.info("Loading model...")
	model, tokenizer = FastLanguageModel.from_pretrained(
	"boolean_model_merged",
	max_seq_length=2048,
	dtype=None, # Auto-detect
	load_in_4bit=True
	)
	FastLanguageModel.for_inference(model)
	return model, tokenizer

	def format_prompt(query):
	"""Format query with instruction prompt."""
	return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

	### Instruction:
	Convert this natural language query into a boolean search query by following these rules:

	1. FIRST: Remove all meta-terms from this list (they should NEVER appear in output):
	- articles, papers, research, studies
	- examining, investigating, analyzing
	- findings, documents, literature
	- publications, journals, reviews
	Example: "Research examining X" → just "X"

	2. SECOND: Remove generic implied terms that don't add search value:
	- Remove words like "practices," "techniques," "methods," "approaches," "strategies"
	- Remove words like "impacts," "effects," "influences," "role," "applications"
	- For example: "sustainable agriculture practices" → "sustainable agriculture"
	- For example: "teaching methodologies" → "teaching"
	- For example: "leadership styles" → "leadership"

	3. THEN: Format the remaining terms:
	CRITICAL QUOTING RULES:
	- Multi-word phrases MUST ALWAYS be in quotes - NO EXCEPTIONS
	- Examples of correct quoting:
	- Wrong: machine learning AND deep learning
	- Right: "machine learning" AND "deep learning"
	- Wrong: natural language processing
	- Right: "natural language processing"
	- Single words must NEVER have quotes (e.g., science, research, learning)
	- Use AND to connect required concepts
	- Use OR with parentheses for alternatives (e.g., ("soil health" OR biodiversity))

	Example conversions showing proper quoting:
	"Research on machine learning for natural language processing"
	→ "machine learning" AND "natural language processing"

	"Studies examining anxiety depression stress in workplace"
	→ (anxiety OR depression OR stress) AND workplace

	"Articles about deep learning impact on computer vision"
	→ "deep learning" AND "computer vision"

	"Research on sustainable agriculture practices and their impact on soil health or biodiversity"
	→ "sustainable agriculture" AND ("soil health" OR biodiversity)

	"Articles about effective teaching methods for second language acquisition"
	→ teaching AND "second language acquisition"

	### Input:
	{query}

	### Response:
	"""

	def get_boolean_query(query):
	"""Generate boolean query from natural language."""
	prompt = format_prompt(query)
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Tokenize and generate response
	inputs = tokenizer(prompt, return_tensors="pt").to(device)
	outputs = model.generate(
	**inputs,
	max_new_tokens=32,
	do_sample=False,
	use_cache=True,
	eos_token_id=tokenizer.eos_token_id
	)

	# Extract response section and clean output
	full_response = tokenizer.decode(outputs[0])
	response = full_response.split("### Response:")[-1].strip()
	# Remove end of text token if present
	cleaned_response = response.replace("<\|end_of_text\|>", "").strip()
	return cleaned_response

	# Load model globally
	logger.info("Initializing model...")
	model, tokenizer = load_model()
	logger.info("Model loaded successfully")

	# Example queries using more natural language
	examples = [
	# Testing removal of meta-terms
	["Find research papers examining the long-term effects of meditation on brain structure"],

	# Testing removal of generic implied terms (practices, techniques, methods)
	["Articles about deep learning techniques for natural language processing tasks"],

	# Testing removal of impact/effect terms
	["Studies on the impact of early childhood nutrition on cognitive development"],

	# Testing handling of technology applications
	["Information on virtual reality applications in architectural design and urban planning"],

	# Testing proper OR relationship with parentheses
	["Research on electric vehicles adoption in urban environments or rural communities"],

	# Testing proper quoting of multi-word concepts only
	["Articles on biodiversity loss in coral reefs and rainforest ecosystems"],

	# Testing removal of strategy/approach terms
	["Studies about different teaching approaches for children with learning disabilities"],

	# Testing complex OR relationships
	["Research examining social media influence on political polarization or public discourse"],

	# Testing implied terms in specific industries
	["Articles about implementation strategies for blockchain in supply chain management or financial services"],

	# Testing qualifiers that don't add search value
	["Research on effective leadership styles in multicultural organizations"],

	# Testing removal of multiple implied terms
	["Studies on the effects of microplastic pollution techniques on marine ecosystem health"],

	# Testing domain-specific implied terms
	["Articles about successful cybersecurity protection methods for critical infrastructure"],

	# Testing generalized vs specific concepts
	["Research papers on quantum computing algorithms for cryptography or optimization problems"],

	# Testing implied terms in outcome descriptions
	["Studies examining the relationship between sleep quality and academic performance outcomes"],

	# Testing complex nesting of concepts
	["Articles about renewable energy integration challenges in developing countries or island nations"]
	]


	# Create Gradio interface with metadata for deployment
	title = "Boolean Search Query Generator"
	description = "Convert natural language queries into boolean search expressions. The model will remove search-related terms (like 'articles', 'research', etc.), handle generic implied terms (like 'practices', 'methods'), and format the core concepts using proper boolean syntax."
	demo = gr.Interface(
	fn=get_boolean_query,
	inputs=[
	gr.Textbox(
	label="Enter your natural language query",
	placeholder="e.g., I'm looking for information about climate change and renewable energy"
	)
	],
	outputs=gr.Textbox(label="Boolean Search Query"),
	title=title,
	description=description,
	examples=examples,
	theme=gr.themes.Soft()
	)

	if __name__ == "__main__":
	demo.launch()