Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import whisper | |
| import torch | |
| import nest_asyncio | |
| import os | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_huggingface import HuggingFaceEndpoint | |
| from langchain_community.vectorstores import FAISS | |
| from gtts import gTTS | |
| nest_asyncio.apply() | |
| # Securely fetching the token from Hugging Face Settings | |
| hf_token = os.environ.get("HF_TOKEN") | |
| print("Connecting to Hugging Face Cloud...") | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| # Using Flan-T5-large as it is highly stable on the free API tier | |
| llm = HuggingFaceEndpoint( | |
| repo_id="google/flan-t5-large", | |
| task="text2text-generation", | |
| max_new_tokens=256, | |
| temperature=0.1, | |
| huggingfacehub_api_token=hf_token | |
| ) | |
| whisper_model = whisper.load_model("base") | |
| vector_db = None | |
| def process_pdf(pdf_file): | |
| global vector_db | |
| if pdf_file is None: return "⚠️ Please upload a file." | |
| loader = PyPDFLoader(pdf_file.name) | |
| pages = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| docs = text_splitter.split_documents(pages) | |
| vector_db = FAISS.from_documents(docs, embeddings) | |
| return "✅ Knowledge Base Ready!" | |
| def technician_agent(audio_path): | |
| if vector_db is None: | |
| return "⚠️ Please upload a PDF first!", None | |
| result = whisper_model.transcribe(audio_path) | |
| user_text = result["text"] | |
| relevant_docs = vector_db.similarity_search(user_text, k=3) | |
| context_text = "\n\n".join([doc.page_content for doc in relevant_docs]) | |
| prompt = f"""Answer the question based on the context below. Keep it short and actionable. | |
| Context: {context_text} | |
| Question: {user_text} | |
| """ | |
| response = llm.invoke(prompt) | |
| clean_response = response.strip() | |
| tts = gTTS(clean_response) | |
| output_audio = "response.mp3" | |
| tts.save(output_audio) | |
| return clean_response, output_audio | |
| with gr.Blocks(theme=gr.themes.Soft()) as app: | |
| gr.Markdown("# 🏭 AI Industrial Technician (Cloud API)") | |
| with gr.Row(): | |
| pdf_input = gr.File(label="1. Upload Technical Manual (PDF)", file_types=[".pdf"]) | |
| upload_btn = gr.Button("Process Manual") | |
| status_text = gr.Textbox(label="Status", interactive=False) | |
| upload_btn.click(process_pdf, inputs=[pdf_input], outputs=[status_text]) | |
| with gr.Row(): | |
| audio_input = gr.Audio(sources=["microphone"], type="filepath", label="2. Ask Question (Voice)") | |
| with gr.Row(): | |
| text_output = gr.Textbox(label="AI Answer (Text)") | |
| audio_output = gr.Audio(label="AI Answer (Voice)", autoplay=True) | |
| audio_input.change(technician_agent, inputs=[audio_input], outputs=[text_output, audio_output]) | |
| app.launch() |