Spaces:

sultan-hassan
/

notebook_CI_chatbot

Sleeping

App Files Files Community

notebook_CI_chatbot / app.py

sultan-hassan

Update app.py

b4dd8b9 verified 4 months ago

raw

history blame contribute delete

3.96 kB

	import gradio as gr
	import requests
	import os
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langchain_chroma import Chroma
	from langchain_core.prompts import PromptTemplate
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_core.prompts import PromptTemplate
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnablePassthrough


	# scrape docs from github using jina
	def load_doc():
	urlsfile = open("./new_docs.txt")
	urls = urlsfile.readlines()
	base_path = "https://github.com/spacetelescope/notebook-ci-actions/tree/dev-actions-v2-pipeline/docs/"
	urls = [base_path+url.replace("\n","") for url in urls]
	pages = []
	for url in urls:
	url = "https://r.jina.ai/" + url
	pages.append(requests.get(url).text)

	return pages



	# embed to convert to tokens

	embeddings = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1")

	# define the vector store and use it as retriever!

	vectorstore = Chroma(
	collection_name="notebook_docs",
	embedding_function=embeddings,
	persist_directory="./",
	)
	vectorstore.add_texts(load_doc())
	retriever = vectorstore.as_retriever()

	# RAG prompt
	template = ("""
	You are a GitHub Actions and Jupyter Notebook expert.

	Your task is to answer the question using only the information provided in the context.
	If the context does not contain enough information, clearly state that and, if appropriate,
	briefly outline what additional details would be needed to give a complete answer.

	Guidelines:
	- Focus only on the question. Do not mention the context or that you’re using retrieved text.
	- Provide step-by-step, technically accurate explanations and examples where relevant.
	- Avoid speculation, guesses, or outdated practices.
	- Prefer modern, secure, and well-supported methods.
	- Keep the answer concise but complete.

	Context:
	{context}

	Question:
	{question}

	Answer:
	""")
	#template = ("""You are Github action and Jupyter Notebook expert.
	# Use the provided context to answer the question.
	# If you don't know the answer, say so. Explain your answer in detail.
	# Do not discuss the context in your response; just provide the answer directly.
	# Context: {context}
	# Question: {question}
	# Answer:""")

	rag_prompt = PromptTemplate.from_template(template)

	# define the LLM
	llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",temperature=0.1, google_api_key=os.environ.get('GOOGLE_API_KEY'))

	rag_chain = (
	{"context": retriever, "question": RunnablePassthrough()}
	\| rag_prompt
	\| llm
	\| StrOutputParser()
	)


	def rag_memory_stream(message, history):
	partial_text = ""
	for new_text in rag_chain.stream(message):
	partial_text += new_text
	yield partial_text


	examples = ['What is the Unified Notebook CI/CD System?', 'How to migrate the old CI to the new CI?']
	description = "Real-time AI App to Answer questions about the unified Notebook CI/CD system"
	title = "Notebook CI/CD chatbot"

	demo = gr.ChatInterface(fn=rag_memory_stream,
	type="messages",
	title=title,
	description=description,
	fill_height=True,
	examples=examples,
	theme="glass",
	)

	if __name__ == "__main__":
	demo.launch()