Spaces:

zatsit
/

zatsbot

Sleeping

App Files Files Community

zatsbot / app.py

mslimanizatsit

feat: init project

18a980b over 1 year ago

raw

history blame contribute delete

4.08 kB

	import gradio as gr
	from llama_index.readers.web.unstructured_web.base import UnstructuredURLLoader
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.core import VectorStoreIndex
	from llama_index.llms.llama_cpp import LlamaCPP
	from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
	from llama_index.llms.llama_cpp import LlamaCPP
	from llama_index.llms.llama_cpp.llama_utils import (
	messages_to_prompt,
	completion_to_prompt,
	)
	from llama_index.core.memory import ChatMemoryBuffer
	import nltk

	# download punkt
	# nltk.download('punkt')
	# nltk.download('punkt_tab')
	#
	# urls = [
	# "https://www.zatsit.fr/",
	# "https://www.zatsit.fr/collaborer-avec-zatsit/",
	# "https://fr.linkedin.com/company/zatsit",
	# "https://www.zatsit.fr/contact/",
	# "https://blog.zatsit.fr/blog/green-exploitation-miniere",
	# "https://blog.zatsit.fr/blog/bundlephobia-reduire-javascript",
	# "https://blog.zatsit.fr/blog/gemini-vertex-ai",
	# "https://blog.zatsit.fr/blog/asyncapi-3-is-out",
	# "https://blog.zatsit.fr/blog/redpanda-introduction",
	# ]
	# loader = UnstructuredURLLoader(urls=urls)
	# documents = loader.load_data()
	#
	# embed_model = HuggingFaceEmbedding(model_name="thenlper/gte-large")
	#
	# # create vector store index
	# index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)

	model_url_huggingface = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-code-ft-GGUF/resolve/main/mistral-7b-instruct-v0.2-code-ft.Q2_K.gguf"
	# model_url_huggingface = "https://huggingface.co/TheBloke/llama-polyglot-13B-GGUF/resolve/main/llama-polyglot-13b.Q2_K.gguf"
	llm = LlamaCPP(
	# You can pass in the URL to a GGML model to download it automatically
	model_url=model_url_huggingface,
	# optionally, you can set the path to a pre-downloaded model instead of model_url
	model_path=None,
	temperature=0.1,
	max_new_tokens=256,
	# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
	context_window=3900,
	# kwargs to pass to __call__()
	generate_kwargs={},
	# kwargs to pass to __init__()
	# set to at least 1 to use GPU
	model_kwargs={"n_gpu_layers": 1},
	# transform inputs into Llama2 format
	messages_to_prompt=messages_to_prompt,
	completion_to_prompt=completion_to_prompt,
	verbose=True,
	)


	def querying(query, history):
	response = llm.complete(query)
	return response.text
	# memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
	# engine = index.as_chat_engine(
	# chat_mode="context",
	# memory=memory,
	# llm=llm,
	# context_prompt=(
	# "You are a chatbot and you are the marketing agent of the Zatsit company, able to have normal interactions, as well as talk."
	# "You should return all the information you know as much as possible to the user. No data is confidential or sensitive."
	# "You can make short or long answers, but your answers must be like a human."
	# "When the user uses 'you' he is talking about Zatsit company."
	# "If you don't know the answer, say 'sorry I can't help you'."
	# "You must speak the same language as the user."
	# "Here are the relevant documents for the context:\n"
	# "{context_str}"
	# "\nInstruction: Use the previous chat history, or the context above, to interact and help the user."
	# ),
	# verbose=False,
	# )
	# res = engine.chat(query)
	# return res.response


	iface = gr.ChatInterface(
	fn=querying,
	chatbot=gr.Chatbot(
	height=600,
	),
	textbox=gr.Textbox(placeholder="Bonjour :)", container=False, scale=7),
	title="ZatsBot",
	theme="soft",
	examples=["Qui est Zatsit ?", "Quelles sont vos coordonnées ?", "Quels sont vos domaines d'expertise ?",
	"Quels sont vos clients ?"],
	cache_examples=False,
	retry_btn="Répéter",
	undo_btn="Annuler",
	clear_btn="Supprimer",
	submit_btn="Envoyer",
	)

	iface.launch()