import gradio as gr import pandas as pd import numpy as np import faiss import pypdf import whisper import os from openai import OpenAI from sentence_transformers import SentenceTransformer from docx import Document import csv # Initialize DeepSeek API client client = OpenAI(api_key="sk-6c8cb9fc17fb4c6b828de6290cbd76ca", base_url="https://api.deepseek.com") # Load Whisper model for speech-to-text whisper_model = whisper.load_model("base") # Function to extract text from a PDF def extract_text_from_pdf(pdf_path): reader = pypdf.PdfReader(pdf_path) text = "" for page in reader.pages: text += page.extract_text() + "\n" return text.strip() # Function to extract text from a Word document def extract_text_from_word(doc_path): doc = Document(doc_path) text = "\n".join([para.text for para in doc.paragraphs]) return text.strip() # Function to extract text from a CSV file def extract_text_from_csv(csv_path): text = "" with open(csv_path, newline='', encoding='utf-8') as csvfile: reader = csv.reader(csvfile) for row in reader: text += " | ".join(row) + "\n" return text.strip() # Function to process uploaded document def process_uploaded_file(file_path): ext = os.path.splitext(file_path)[1].lower() if ext == ".pdf": return extract_text_from_pdf(file_path) elif ext in [".doc", ".docx"]: return extract_text_from_word(file_path) elif ext in [".csv"]: return extract_text_from_csv(file_path) else: return "Unsupported file format. Please upload a PDF, Word, or CSV file." # Function to split text into chunks def split_text(text, chunk_size=2000): return [text[i : i + chunk_size] for i in range(0, len(text), chunk_size)] # Function to convert speech to text def process_audio(audio_filepath): """Convert speech to text using Whisper ASR.""" if not audio_filepath: return "⚠️ ERROR: No audio detected. Please try speaking again." try: result = whisper_model.transcribe(audio_filepath) return result["text"] if result["text"] else "⚠️ ERROR: Speech not clear. Try again." except Exception as e: return f"⚠️ ERROR: Whisper failed - {str(e)}" # Agentic AI Capabilities # 1️⃣ Chain-of-Thought Reasoning def chain_of_thought_reasoning(query): """Break down queries into logical steps.""" steps = [ "Step 1: Identify the relevant sections from the document.", "Step 2: Retrieve the most relevant parts of the document.", "Step 3: Summarize key points and provide a detailed explanation.", "Step 4: Generate a user-friendly response." ] return "\n".join(steps) # 2️⃣ Multi-Hop Question Answering def multi_hop_answering(query, document_text): """Retrieve multiple relevant sections for a more complete answer.""" relevant_sections = search_faiss(query, top_k=10) reasoning = chain_of_thought_reasoning(query) answer = chat_with_document(query, document_text, min_words=100, max_words=500) return f"Reasoning:\n{reasoning}\n\nExtracted Information:\n{relevant_sections}\n\nFinal Answer:\n{answer}" # 3️⃣ External Tool Use (Fallback for Missing Answers) def tool_use(query): """If FAISS doesn’t find an answer, ask an external database.""" result = search_faiss(query) if not result: return "I could not find an answer in the document. Would you like me to search external sources?" return result # Function to generate responses using DeepSeek def chat_with_document(query, document_text, min_words=50, max_words=500): """Retrieve relevant text and ask DeepSeek for an answer.""" relevant_context = tool_use(query) # Use FAISS first, else external tools control_prompt = f"Generate an answer between {min_words}-{max_words} words." response = client.chat.completions.create( model="deepseek-reasoner", messages=[ {"role": "system", "content": "You are an expert document analyst providing insights based on the uploaded document."}, {"role": "user", "content": f"{control_prompt}\n\nDocument: {document_text}\n\nUser Question: {query}"}, ], stream=False ) return response.choices[0].message.content # Gradio Interface with gr.Blocks() as demo: gr.Markdown("# 📄 Document Analyzer (Agentic AI)") with gr.Row(): file_input = gr.File(label="Upload Document (PDF, Word, CSV)") voice_input = gr.Audio(label="Speak your Question", type="filepath") query_input = gr.Textbox(label="Ask a Question", elem_classes=["big-textbox"]) submit_button = gr.Button("Analyze", elem_classes=["big-button"]) response_output = gr.Textbox(label="Answer from Document", interactive=False, elem_classes=["big-response"]) def analyze_document(file, query): if file: document_text = process_uploaded_file(file) return multi_hop_answering(query, document_text) else: return "⚠️ ERROR: No document uploaded. Please upload a file." voice_input.change(process_audio, inputs=[voice_input], outputs=[query_input]) submit_button.click(analyze_document, inputs=[file_input, query_input], outputs=[response_output]) demo.launch()