Spaces:

frendyrachman
/

Burhan-AI

Sleeping

App Files Files Community

Burhan-AI / app.py

frendyrachman

Update app.py

f513bae verified 11 months ago

raw

history blame contribute delete

4.79 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM
	from datasets import load_dataset
	import chromadb
	import torch
	from sentence_transformers import SentenceTransformer
	import os
	from chromadb.utils import embedding_functions

	# Initialize ChromaDB client with the existing path
	client = chromadb.PersistentClient(path="new_hadith_rag_source")

	# Load the existing collection
	collection = client.get_collection(name="hadiths_new_complete")

	# Debugging print to verify the number of documents in the collection
	print(f"Number of documents in collection: {collection.count()}")

	# Model and Tokenizer Loading
	model_name = "google/flan-t5-base"
	token = os.getenv("HUGGINGFACE_TOKEN")
	tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base", token=token)
	llm = AutoModelForSeq2SeqLM.from_pretrained(
	model_name,
	pad_token_id=tokenizer.eos_token_id,
	token=token,
	device_map="auto"
	)

	# Load the pre-trained model and tokenizer
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	retrieval_model = SentenceTransformer('all-MiniLM-L6-v2').to(device)

	# Function to query the collection
	def query_collection(query, n_results):
	# Compute the embedding for the query
	query_embedding = retrieval_model.encode([query], convert_to_tensor=True, device=device).cpu().numpy()

	# Query the collection
	results = collection.query(query_embeddings=query_embedding, n_results=n_results)

	return results

	# Generate a response using the retrieved documents as context
	def generate_response(context, question):
	prompt = f"Please provide a short, well-structured answer and avoids repetition from context:\n{context}\n\nQuestion:\n{question}\n\nAnswer:"
	inputs = tokenizer(prompt, return_tensors="pt").to(device)
	outputs = llm.generate(**inputs, max_length=2048, num_return_sequences=1, num_beams=5, temperature=0.9, pad_token_id=tokenizer.eos_token_id)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return response

	# Main chatbot function with basic RAG
	def chatbot_response(user_query, top_k=2):
	# Step 1: Retrieve relevant documents
	results = query_collection(user_query, top_k)

	# Step 2: Combine retrieved documents into context
	documents = [doc for doc_list in results['documents'] for doc in doc_list]
	combined_context = "\n\n".join(documents)

	# Step 3: Generate a response using the combined context
	response = generate_response(combined_context, user_query)

	return response

	# Global variable to control the processing state
	stop_processing = False

	def chatbot(query, num_candidates):
	global stop_processing
	stop_processing = False # Reset stop flag at the beginning of each query

	# Jika query kosong, kembalikan pesan default
	if not query.strip():
	return "Please ask a question about hadiths."

	# Lakukan retrieval dan generation dengan Speculative RAG
	answer = chatbot_response(query, num_candidates)

	# Check if stop button was pressed
	if stop_processing:
	return "Processing was stopped by the user."

	# Format jawaban
	if "don't know" in answer.lower() or "not sure" in answer.lower():
	return "Sorry. I don't have information about the hadiths related. It might be a dhoif, or maudhu, or I just don't have the knowledge."
	else:
	return answer

	def stop():
	global stop_processing
	stop_processing = True
	return "Processing stopped."

	# Buat Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# Burhan AI
	Assalamualaikum! I am Burhan AI, a chatbot that can help you find answers to your questions about hadiths.
	\n
	Please note that this is a demo version and may not be perfect.
	This chatbot is powered by the ChromaDB and Flan-T5-base models with RAG architecture.
	Flan-T5-base is a small model and may not be as accurate as the bigger models.
	If you have any feedback or suggestions, you can contact me at frendyrachman7@gmail.com
	\n
	Jazakallah Khairan!
	"""
	)
	with gr.Row():
	query_input = gr.Textbox(lines=2, placeholder="Enter your question here...")
	num_candidates_input = gr.Slider(minimum=1, maximum=10, value=2, step=1, label="Number of References")
	submit_button = gr.Button("Submit")

	output_text = gr.Textbox(label="Response")

	submit_button.click(chatbot, inputs=[query_input, num_candidates_input], outputs=output_text)

	# Add a button to stop processing
	stop_button = gr.Button("Stop Processing")
	stop_output = gr.Textbox(visible=False)
	stop_button.click(stop, inputs=[], outputs=stop_output)

	# Jalankan Gradio interface
	demo.launch()