Spaces:

Mohit0708
/

Industrial-Technician

Sleeping

App Files Files Community

Industrial-Technician / app.py

Mohit0708

Create app.py

37bbbb7 verified about 2 months ago

raw

history blame contribute delete

2.91 kB

	import gradio as gr
	import whisper
	import torch
	import nest_asyncio
	import os
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_huggingface import HuggingFaceEndpoint
	from langchain_community.vectorstores import FAISS
	from gtts import gTTS

	nest_asyncio.apply()

	# Securely fetching the token from Hugging Face Settings
	hf_token = os.environ.get("HF_TOKEN")

	print("Connecting to Hugging Face Cloud...")
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

	# Using Flan-T5-large as it is highly stable on the free API tier
	llm = HuggingFaceEndpoint(
	repo_id="google/flan-t5-large",
	task="text2text-generation",
	max_new_tokens=256,
	temperature=0.1,
	huggingfacehub_api_token=hf_token
	)

	whisper_model = whisper.load_model("base")
	vector_db = None

	def process_pdf(pdf_file):
	global vector_db
	if pdf_file is None: return "⚠️ Please upload a file."

	loader = PyPDFLoader(pdf_file.name)
	pages = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	docs = text_splitter.split_documents(pages)
	vector_db = FAISS.from_documents(docs, embeddings)
	return "✅ Knowledge Base Ready!"

	def technician_agent(audio_path):
	if vector_db is None:
	return "⚠️ Please upload a PDF first!", None

	result = whisper_model.transcribe(audio_path)
	user_text = result["text"]

	relevant_docs = vector_db.similarity_search(user_text, k=3)
	context_text = "\n\n".join([doc.page_content for doc in relevant_docs])

	prompt = f"""Answer the question based on the context below. Keep it short and actionable.

	Context: {context_text}
	Question: {user_text}
	"""

	response = llm.invoke(prompt)
	clean_response = response.strip()

	tts = gTTS(clean_response)
	output_audio = "response.mp3"
	tts.save(output_audio)

	return clean_response, output_audio

	with gr.Blocks(theme=gr.themes.Soft()) as app:
	gr.Markdown("# 🏭 AI Industrial Technician (Cloud API)")

	with gr.Row():
	pdf_input = gr.File(label="1. Upload Technical Manual (PDF)", file_types=[".pdf"])
	upload_btn = gr.Button("Process Manual")
	status_text = gr.Textbox(label="Status", interactive=False)

	upload_btn.click(process_pdf, inputs=[pdf_input], outputs=[status_text])

	with gr.Row():
	audio_input = gr.Audio(sources=["microphone"], type="filepath", label="2. Ask Question (Voice)")

	with gr.Row():
	text_output = gr.Textbox(label="AI Answer (Text)")
	audio_output = gr.Audio(label="AI Answer (Voice)", autoplay=True)

	audio_input.change(technician_agent, inputs=[audio_input], outputs=[text_output, audio_output])

	app.launch()