Spaces:

Macdensten91
/

google-flan-t5-small

Sleeping

App Files Files Community

google-flan-t5-small / app.py

Macdensten91

Update app.py

b0842cc verified 7 months ago

raw

history blame contribute delete

9.8 kB

	import os
	import re
	import time
	import random
	import gradio as gr
	from huggingface_hub import InferenceClient

	# Load environment variables from .env


	# Retrieve the Hugging Face API token from the environment
	# Initialize the InferenceClient (update the model as needed)
	client = InferenceClient(
	model="microsoft/Phi-4-mini-reasoning" # Change to your model if needed
	)

	# Optional: Enable scraping if your site is deployed.
	ENABLE_SCRAPING = False
	SITE_URL = "https://your-agri-future-site.com"

	# Global variable to hold scraped content.
	knowledge_base = ""

	# --- Optional: Scraping Functionality ---
	if ENABLE_SCRAPING:
	try:
	from selenium import webdriver
	from selenium.webdriver.chrome.options import Options
	from selenium.webdriver.common.by import By

	def scrape_site(url):
	options = Options()
	options.headless = True # Run browser in headless mode.
	driver = webdriver.Chrome(options=options)
	driver.get(url)
	# Use explicit waits in production; here we use a basic sleep.
	time.sleep(5)
	try:
	# Customize the selector based on your site's HTML structure.
	content_element = driver.find_element(By.ID, "content")
	page_text = content_element.text
	except Exception as e:
	page_text = "Error encountered during scraping: " + str(e)
	driver.quit()
	return page_text

	knowledge_base = scrape_site(SITE_URL)
	print("Scraped knowledge base successfully.")
	except Exception as e:
	print("Scraping failed or Selenium is not configured:", e)
	else:
	print("Scraping is disabled; proceeding without scraped site content.")

	# --- Multilingual Helpers ---

	def is_greeting(query: str, lang: str) -> bool:
	greetings = {
	"en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"],
	"fr": ["bonjour", "salut", "coucou", "bonsoir"],
	"am": ["ሰላም", "ሰላም እንደምን", "እንዴት"]
	}
	greet_list = greetings.get(lang, greetings["en"])
	# For languages using Latin script, convert the query to lower case.
	if lang != "am":
	query = query.lower()
	return any(query.startswith(greet) for greet in greet_list)

	def generate_dynamic_greeting(language: str) -> str:
	"""
	Generate a dynamic, context-relevant greeting using the Hugging Face Inference API.
	"""
	system_prompts = {
	"en": (
	"You are a friendly chatbot specializing in agriculture and agro-investment. "
	"A user just greeted you. Generate a warm, dynamic greeting message in English that is context-aware and encourages discussion about agriculture or agro-investment."
	),
	"fr": (
	"Vous êtes un chatbot chaleureux spécialisé dans l'agriculture et les investissements agroalimentaires. "
	"Un utilisateur vient de vous saluer. Générez un message de salutation dynamique et chaleureux en français, en restant pertinent par rapport à l'agriculture ou aux investissements agroalimentaires."
	),
	"am": (
	"እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ ባለሙያ ቻትቦት ናቸው። "
	"ተጠቃሚው በአማርኛ ሰላም መልእክት አስቀድመዋል። "
	"በአማርኛ ተዛማጅ እና ትክክለኛ የሆነ ሰላም መልእክት ፍጥረት ያድርጉ።"
	)
	}
	prompt = system_prompts.get(language, system_prompts["en"])
	messages = [{"role": "system", "content": prompt}]
	response = client.chat_completion(
	messages,
	max_tokens=128,
	stream=False,
	temperature=1,
	top_p=0.95,
	)
	try:
	greeting_message = response.choices[0].message.content
	except AttributeError:
	greeting_message = str(response)
	return greeting_message.strip()

	def generate_dynamic_out_of_scope_message(language: str) -> str:
	"""
	Generate a dynamic out-of-scope message using the Hugging Face Inference API.
	"""
	system_prompts = {
	"en": (
	"You are a helpful chatbot specializing in agriculture and agro-investment. "
	"A user just asked a question that is not related to these topics. "
	"Generate a friendly, varied, and intelligent out-of-scope response in English that kindly encourages the user to ask about agriculture or agro-investment."
	),
	"fr": (
	"Vous êtes un chatbot utile spécialisé dans l'agriculture et les investissements agroalimentaires. "
	"Un utilisateur vient de poser une question qui ne concerne pas ces sujets. "
	"Générez une réponse élégante, variée et intelligente en français pour indiquer que la question est hors de portée, en invitant l'utilisateur à poser une question sur l'agriculture ou les investissements agroalimentaires."
	),
	"am": (
	"እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ በተለይ የተሞሉ ቻትቦት ናቸው። "
	"ተጠቃሚው ለግብርና ወይም ለአገልግሎት ስርዓተ-ቢዝነስ ተያይዞ ያልሆነ ጥያቄ አስቀድመዋል። "
	"በአማርኛ በተለያዩ መልኩ የውጭ ክፍል መልእክት ፍጥረት ያድርጉ፤ እባኮትን ተጠቃሚውን ለግብርና ወይም ለአገልግሎት ጥያቄዎች ለመጠየቅ ያነጋግሩ።"
	)
	}
	prompt = system_prompts.get(language, system_prompts["en"])
	messages = [{"role": "system", "content": prompt}]
	response = client.chat_completion(
	messages,
	max_tokens=128,
	stream=False,
	temperature=1,
	top_p=0.95,
	)
	try:
	out_message = response.choices[0].message.content
	except AttributeError:
	out_message = str(response)
	return out_message.strip()

	def is_domain_query(query: str) -> bool:
	"""
	Check if a query relates to agriculture or agro-investment.
	"""
	domain_keywords = [
	"agriculture", "farming", "crop", "agro", "investment", "soil",
	"irrigation", "harvest", "organic", "sustainable", "agribusiness",
	"livestock", "agroalimentaire", "agriculture durable",
	"greenhouse", "horticulture", "pesticide", "fertilizer",
	"rural development", "food production", "crop yield", "farm equipment",
	"agronomy", "farming techniques", "organic farming", "agro-tech",
	"farm management", "agrifood"
	]
	return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords)

	def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str:
	"""
	Retrieve a relevant snippet from the text based on the query.
	"""
	sentences = re.split(r'[.?!]', text)
	for sentence in sentences:
	if is_domain_query(sentence) and all(word.lower() in sentence.lower() for word in query.split()):
	snippet = sentence.strip()
	return snippet[:max_length] + "..." if len(snippet) > max_length else snippet
	return ""

	# --- Chat Assistant Response Function ---
	def respond(message, history: list, system_message, max_tokens, temperature, top_p, language):
	# Check for a greeting.
	if is_greeting(message, language):
	yield generate_dynamic_greeting(language)
	return

	# If query is out of domain, generate an out-of-scope message.
	if not is_domain_query(message):
	yield generate_dynamic_out_of_scope_message(language)
	return

	# Build conversation context from the system message and conversation history.
	messages_list = [{"role": "system", "content": system_message}]
	for user_msg, assistant_msg in history:
	if user_msg:
	messages_list.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages_list.append({"role": "assistant", "content": assistant_msg})

	# Optionally add a relevant snippet from the scraped content (if available).
	if knowledge_base:
	snippet = retrieve_relevant_snippet(message, knowledge_base)
	if snippet:
	retrieval_context = f"Reference from Agri Future Investment platform: {snippet}"
	messages_list.insert(0, {"role": "system", "content": retrieval_context})

	messages_list.append({"role": "user", "content": message})

	# Generate the assistant's answer by streaming responses.
	response_text = ""
	for partial_response in client.chat_completion(
	messages_list,
	max_tokens=1024,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	if partial_response.choices and partial_response.choices[0].delta:
	token = partial_response.choices[0].delta.content
	if token:
	response_text += token
	yield response_text
	# --- Gradio Chat Interface ---
	demo = gr.ChatInterface(
	fn=respond,
	additional_inputs=[
	gr.Textbox(
	value="You are AgriFutureBot, a specialized assistant for agriculture and agro-investment insights.",
	label="System Message"
	),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
	gr.Dropdown(choices=["en", "fr", "am"], value="en", label="Language")
	],
	)

	if __name__ == "__main__":
	demo.launch()