Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from unsloth import FastLanguageModel | |
| import logging | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| def load_model(): | |
| """Load fine-tuned model.""" | |
| logger.info("Loading model...") | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| "boolean_model_merged", | |
| max_seq_length=2048, | |
| dtype=None, # Auto-detect | |
| load_in_4bit=True | |
| ) | |
| FastLanguageModel.for_inference(model) | |
| return model, tokenizer | |
| def format_prompt(query): | |
| """Format query with instruction prompt.""" | |
| return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. | |
| ### Instruction: | |
| Convert this natural language query into a boolean search query by following these rules: | |
| 1. FIRST: Remove all meta-terms from this list (they should NEVER appear in output): | |
| - articles, papers, research, studies | |
| - examining, investigating, analyzing | |
| - findings, documents, literature | |
| - publications, journals, reviews | |
| Example: "Research examining X" β just "X" | |
| 2. SECOND: Remove generic implied terms that don't add search value: | |
| - Remove words like "practices," "techniques," "methods," "approaches," "strategies" | |
| - Remove words like "impacts," "effects," "influences," "role," "applications" | |
| - For example: "sustainable agriculture practices" β "sustainable agriculture" | |
| - For example: "teaching methodologies" β "teaching" | |
| - For example: "leadership styles" β "leadership" | |
| 3. THEN: Format the remaining terms: | |
| CRITICAL QUOTING RULES: | |
| - Multi-word phrases MUST ALWAYS be in quotes - NO EXCEPTIONS | |
| - Examples of correct quoting: | |
| - Wrong: machine learning AND deep learning | |
| - Right: "machine learning" AND "deep learning" | |
| - Wrong: natural language processing | |
| - Right: "natural language processing" | |
| - Single words must NEVER have quotes (e.g., science, research, learning) | |
| - Use AND to connect required concepts | |
| - Use OR with parentheses for alternatives (e.g., ("soil health" OR biodiversity)) | |
| Example conversions showing proper quoting: | |
| "Research on machine learning for natural language processing" | |
| β "machine learning" AND "natural language processing" | |
| "Studies examining anxiety depression stress in workplace" | |
| β (anxiety OR depression OR stress) AND workplace | |
| "Articles about deep learning impact on computer vision" | |
| β "deep learning" AND "computer vision" | |
| "Research on sustainable agriculture practices and their impact on soil health or biodiversity" | |
| β "sustainable agriculture" AND ("soil health" OR biodiversity) | |
| "Articles about effective teaching methods for second language acquisition" | |
| β teaching AND "second language acquisition" | |
| ### Input: | |
| {query} | |
| ### Response: | |
| """ | |
| def get_boolean_query(query): | |
| """Generate boolean query from natural language.""" | |
| prompt = format_prompt(query) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Tokenize and generate response | |
| inputs = tokenizer(prompt, return_tensors="pt").to(device) | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=32, | |
| do_sample=False, | |
| use_cache=True, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| # Extract response section and clean output | |
| full_response = tokenizer.decode(outputs[0]) | |
| response = full_response.split("### Response:")[-1].strip() | |
| # Remove end of text token if present | |
| cleaned_response = response.replace("<|end_of_text|>", "").strip() | |
| return cleaned_response | |
| # Load model globally | |
| logger.info("Initializing model...") | |
| model, tokenizer = load_model() | |
| logger.info("Model loaded successfully") | |
| # Example queries using more natural language | |
| examples = [ | |
| # Testing removal of meta-terms | |
| ["Find research papers examining the long-term effects of meditation on brain structure"], | |
| # Testing removal of generic implied terms (practices, techniques, methods) | |
| ["Articles about deep learning techniques for natural language processing tasks"], | |
| # Testing removal of impact/effect terms | |
| ["Studies on the impact of early childhood nutrition on cognitive development"], | |
| # Testing handling of technology applications | |
| ["Information on virtual reality applications in architectural design and urban planning"], | |
| # Testing proper OR relationship with parentheses | |
| ["Research on electric vehicles adoption in urban environments or rural communities"], | |
| # Testing proper quoting of multi-word concepts only | |
| ["Articles on biodiversity loss in coral reefs and rainforest ecosystems"], | |
| # Testing removal of strategy/approach terms | |
| ["Studies about different teaching approaches for children with learning disabilities"], | |
| # Testing complex OR relationships | |
| ["Research examining social media influence on political polarization or public discourse"], | |
| # Testing implied terms in specific industries | |
| ["Articles about implementation strategies for blockchain in supply chain management or financial services"], | |
| # Testing qualifiers that don't add search value | |
| ["Research on effective leadership styles in multicultural organizations"], | |
| # Testing removal of multiple implied terms | |
| ["Studies on the effects of microplastic pollution techniques on marine ecosystem health"], | |
| # Testing domain-specific implied terms | |
| ["Articles about successful cybersecurity protection methods for critical infrastructure"], | |
| # Testing generalized vs specific concepts | |
| ["Research papers on quantum computing algorithms for cryptography or optimization problems"], | |
| # Testing implied terms in outcome descriptions | |
| ["Studies examining the relationship between sleep quality and academic performance outcomes"], | |
| # Testing complex nesting of concepts | |
| ["Articles about renewable energy integration challenges in developing countries or island nations"] | |
| ] | |
| # Create Gradio interface with metadata for deployment | |
| title = "Boolean Search Query Generator" | |
| description = "Convert natural language queries into boolean search expressions. The model will remove search-related terms (like 'articles', 'research', etc.), handle generic implied terms (like 'practices', 'methods'), and format the core concepts using proper boolean syntax." | |
| demo = gr.Interface( | |
| fn=get_boolean_query, | |
| inputs=[ | |
| gr.Textbox( | |
| label="Enter your natural language query", | |
| placeholder="e.g., I'm looking for information about climate change and renewable energy" | |
| ) | |
| ], | |
| outputs=gr.Textbox(label="Boolean Search Query"), | |
| title=title, | |
| description=description, | |
| examples=examples, | |
| theme=gr.themes.Soft() | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |