Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import time | |
| import random | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # Load environment variables from .env | |
| # Retrieve the Hugging Face API token from the environment | |
| # Initialize the InferenceClient (update the model as needed) | |
| client = InferenceClient( | |
| model="microsoft/Phi-4-mini-reasoning" # Change to your model if needed | |
| ) | |
| # Optional: Enable scraping if your site is deployed. | |
| ENABLE_SCRAPING = False | |
| SITE_URL = "https://your-agri-future-site.com" | |
| # Global variable to hold scraped content. | |
| knowledge_base = "" | |
| # --- Optional: Scraping Functionality --- | |
| if ENABLE_SCRAPING: | |
| try: | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.common.by import By | |
| def scrape_site(url): | |
| options = Options() | |
| options.headless = True # Run browser in headless mode. | |
| driver = webdriver.Chrome(options=options) | |
| driver.get(url) | |
| # Use explicit waits in production; here we use a basic sleep. | |
| time.sleep(5) | |
| try: | |
| # Customize the selector based on your site's HTML structure. | |
| content_element = driver.find_element(By.ID, "content") | |
| page_text = content_element.text | |
| except Exception as e: | |
| page_text = "Error encountered during scraping: " + str(e) | |
| driver.quit() | |
| return page_text | |
| knowledge_base = scrape_site(SITE_URL) | |
| print("Scraped knowledge base successfully.") | |
| except Exception as e: | |
| print("Scraping failed or Selenium is not configured:", e) | |
| else: | |
| print("Scraping is disabled; proceeding without scraped site content.") | |
| # --- Multilingual Helpers --- | |
| def is_greeting(query: str, lang: str) -> bool: | |
| greetings = { | |
| "en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"], | |
| "fr": ["bonjour", "salut", "coucou", "bonsoir"], | |
| "am": ["į°įį", "į°įį į„įį°įį", "į„įį“įµ"] | |
| } | |
| greet_list = greetings.get(lang, greetings["en"]) | |
| # For languages using Latin script, convert the query to lower case. | |
| if lang != "am": | |
| query = query.lower() | |
| return any(query.startswith(greet) for greet in greet_list) | |
| def generate_dynamic_greeting(language: str) -> str: | |
| """ | |
| Generate a dynamic, context-relevant greeting using the Hugging Face Inference API. | |
| """ | |
| system_prompts = { | |
| "en": ( | |
| "You are a friendly chatbot specializing in agriculture and agro-investment. " | |
| "A user just greeted you. Generate a warm, dynamic greeting message in English that is context-aware and encourages discussion about agriculture or agro-investment." | |
| ), | |
| "fr": ( | |
| "Vous êtes un chatbot chaleureux spécialisé dans l'agriculture et les investissements agroalimentaires. " | |
| "Un utilisateur vient de vous saluer. Générez un message de salutation dynamique et chaleureux en français, en restant pertinent par rapport à l'agriculture ou aux investissements agroalimentaires." | |
| ), | |
| "am": ( | |
| "į„įįµį į įį„įįį į į įįįįįµ įµįįį°-į¢įįįµ įįµį„ į£įįį« į»įµį¦įµ įįøįᢠ" | |
| "į°į įįį į į įįį į°įį įįį„įįµ į įµįįµįįįᢠ" | |
| "į į įįį į°įįį į„į įµįįįį įØįį į°įį įįį„įįµ įį„įØįµ į«įµįįį¢" | |
| ) | |
| } | |
| prompt = system_prompts.get(language, system_prompts["en"]) | |
| messages = [{"role": "system", "content": prompt}] | |
| response = client.chat_completion( | |
| messages, | |
| max_tokens=128, | |
| stream=False, | |
| temperature=1, | |
| top_p=0.95, | |
| ) | |
| try: | |
| greeting_message = response.choices[0].message.content | |
| except AttributeError: | |
| greeting_message = str(response) | |
| return greeting_message.strip() | |
| def generate_dynamic_out_of_scope_message(language: str) -> str: | |
| """ | |
| Generate a dynamic out-of-scope message using the Hugging Face Inference API. | |
| """ | |
| system_prompts = { | |
| "en": ( | |
| "You are a helpful chatbot specializing in agriculture and agro-investment. " | |
| "A user just asked a question that is not related to these topics. " | |
| "Generate a friendly, varied, and intelligent out-of-scope response in English that kindly encourages the user to ask about agriculture or agro-investment." | |
| ), | |
| "fr": ( | |
| "Vous êtes un chatbot utile spécialisé dans l'agriculture et les investissements agroalimentaires. " | |
| "Un utilisateur vient de poser une question qui ne concerne pas ces sujets. " | |
| "Générez une réponse élégante, variée et intelligente en français pour indiquer que la question est hors de portée, en invitant l'utilisateur à poser une question sur l'agriculture ou les investissements agroalimentaires." | |
| ), | |
| "am": ( | |
| "į„įįµį į įį„įįį į į įįįįįµ įµįįį°-į¢įįįµ įįµį„ į į°įį įØį°įį į»įµį¦įµ įįøįᢠ" | |
| "į°į įįį įįį„įį įįį įį įįįįįµ įµįįį°-į¢įįįµ į°į«įį į«įįį į„į«į į įµįįµįįįᢠ" | |
| "į į įįį į į°įį«į© įįį© įØįį įįį įįį„įįµ įį„įØįµ į«įµįįᤠį„į£į®įµį į°į įįįį įįį„įį įįį įį įįįįįµ į„į«įįį½ įįį įØį į«įįįį©į¢" | |
| ) | |
| } | |
| prompt = system_prompts.get(language, system_prompts["en"]) | |
| messages = [{"role": "system", "content": prompt}] | |
| response = client.chat_completion( | |
| messages, | |
| max_tokens=128, | |
| stream=False, | |
| temperature=1, | |
| top_p=0.95, | |
| ) | |
| try: | |
| out_message = response.choices[0].message.content | |
| except AttributeError: | |
| out_message = str(response) | |
| return out_message.strip() | |
| def is_domain_query(query: str) -> bool: | |
| """ | |
| Check if a query relates to agriculture or agro-investment. | |
| """ | |
| domain_keywords = [ | |
| "agriculture", "farming", "crop", "agro", "investment", "soil", | |
| "irrigation", "harvest", "organic", "sustainable", "agribusiness", | |
| "livestock", "agroalimentaire", "agriculture durable", | |
| "greenhouse", "horticulture", "pesticide", "fertilizer", | |
| "rural development", "food production", "crop yield", "farm equipment", | |
| "agronomy", "farming techniques", "organic farming", "agro-tech", | |
| "farm management", "agrifood" | |
| ] | |
| return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords) | |
| def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str: | |
| """ | |
| Retrieve a relevant snippet from the text based on the query. | |
| """ | |
| sentences = re.split(r'[.?!]', text) | |
| for sentence in sentences: | |
| if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()): | |
| snippet = sentence.strip() | |
| return snippet[:max_length] + "..." if len(snippet) > max_length else snippet | |
| return "" | |
| # --- Chat Assistant Response Function --- | |
| def respond(message, history: list, system_message, max_tokens, temperature, top_p, language): | |
| # Check for a greeting. | |
| if is_greeting(message, language): | |
| yield generate_dynamic_greeting(language) | |
| return | |
| # If query is out of domain, generate an out-of-scope message. | |
| if not is_domain_query(message): | |
| yield generate_dynamic_out_of_scope_message(language) | |
| return | |
| # Build conversation context from the system message and conversation history. | |
| messages_list = [{"role": "system", "content": system_message}] | |
| for user_msg, assistant_msg in history: | |
| if user_msg: | |
| messages_list.append({"role": "user", "content": user_msg}) | |
| if assistant_msg: | |
| messages_list.append({"role": "assistant", "content": assistant_msg}) | |
| # Optionally add a relevant snippet from the scraped content (if available). | |
| if knowledge_base: | |
| snippet = retrieve_relevant_snippet(message, knowledge_base) | |
| if snippet: | |
| retrieval_context = f"Reference from Agri Future Investment platform: {snippet}" | |
| messages_list.insert(0, {"role": "system", "content": retrieval_context}) | |
| messages_list.append({"role": "user", "content": message}) | |
| # Generate the assistant's answer by streaming responses. | |
| response_text = "" | |
| for partial_response in client.chat_completion( | |
| messages_list, | |
| max_tokens=1024, | |
| stream=True, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ): | |
| if partial_response.choices and partial_response.choices[0].delta: | |
| token = partial_response.choices[0].delta.content | |
| if token: | |
| response_text += token | |
| yield response_text | |
| # --- Gradio Chat Interface --- | |
| demo = gr.ChatInterface( | |
| fn=respond, | |
| additional_inputs=[ | |
| gr.Textbox( | |
| value="You are AgriFutureBot, a specialized assistant for agriculture and agro-investment insights.", | |
| label="System Message" | |
| ), | |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"), | |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"), | |
| gr.Dropdown(choices=["en", "fr", "am"], value="en", label="Language") | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |