Mohit0708's picture
Create app.py
37bbbb7 verified
import gradio as gr
import whisper
import torch
import nest_asyncio
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEndpoint
from langchain_community.vectorstores import FAISS
from gtts import gTTS
nest_asyncio.apply()
# Securely fetching the token from Hugging Face Settings
hf_token = os.environ.get("HF_TOKEN")
print("Connecting to Hugging Face Cloud...")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Using Flan-T5-large as it is highly stable on the free API tier
llm = HuggingFaceEndpoint(
repo_id="google/flan-t5-large",
task="text2text-generation",
max_new_tokens=256,
temperature=0.1,
huggingfacehub_api_token=hf_token
)
whisper_model = whisper.load_model("base")
vector_db = None
def process_pdf(pdf_file):
global vector_db
if pdf_file is None: return "⚠️ Please upload a file."
loader = PyPDFLoader(pdf_file.name)
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(pages)
vector_db = FAISS.from_documents(docs, embeddings)
return "✅ Knowledge Base Ready!"
def technician_agent(audio_path):
if vector_db is None:
return "⚠️ Please upload a PDF first!", None
result = whisper_model.transcribe(audio_path)
user_text = result["text"]
relevant_docs = vector_db.similarity_search(user_text, k=3)
context_text = "\n\n".join([doc.page_content for doc in relevant_docs])
prompt = f"""Answer the question based on the context below. Keep it short and actionable.
Context: {context_text}
Question: {user_text}
"""
response = llm.invoke(prompt)
clean_response = response.strip()
tts = gTTS(clean_response)
output_audio = "response.mp3"
tts.save(output_audio)
return clean_response, output_audio
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("# 🏭 AI Industrial Technician (Cloud API)")
with gr.Row():
pdf_input = gr.File(label="1. Upload Technical Manual (PDF)", file_types=[".pdf"])
upload_btn = gr.Button("Process Manual")
status_text = gr.Textbox(label="Status", interactive=False)
upload_btn.click(process_pdf, inputs=[pdf_input], outputs=[status_text])
with gr.Row():
audio_input = gr.Audio(sources=["microphone"], type="filepath", label="2. Ask Question (Voice)")
with gr.Row():
text_output = gr.Textbox(label="AI Answer (Text)")
audio_output = gr.Audio(label="AI Answer (Voice)", autoplay=True)
audio_input.change(technician_agent, inputs=[audio_input], outputs=[text_output, audio_output])
app.launch()