import os from dotenv import load_dotenv import gradio as gr from langchain.chat_models import ChatOpenAI from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.chains import RetrievalQA # Carrega variáveis de ambiente load_dotenv() api_key = os.getenv("OPENROUTER_API_KEY") # Inicializa LLM llm = ChatOpenAI( openai_api_base="https://openrouter.ai/api/v1", openai_api_key=api_key, model="deepseek/deepseek-r1-zero:free" ) def processar_pdf(pdf_file, pergunta): # Usa diretamente o caminho fornecido pelo Gradio pdf_path = pdf_file.name # Carrega e divide o PDF loader = PyPDFLoader(pdf_path) documents = loader.load() splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) docs = splitter.split_documents(documents) # Embeddings e índice embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") vectorstore = FAISS.from_documents(docs, embeddings) # Cadeia QA qa_chain = RetrievalQA.from_chain_type( llm=llm, retriever=vectorstore.as_retriever(), return_source_documents=True ) # Resposta resposta = qa_chain.invoke({"query": pergunta}) result = resposta["result"] fontes = "\n\n".join([f"Fonte {i+1}: {doc.page_content[:300]}..." for i, doc in enumerate(resposta["source_documents"])]) return result, fontes # Interface Gradio interface = gr.Interface( fn=processar_pdf, inputs=[ gr.File(label="Envie um PDF"), gr.Textbox(label="Sua pergunta", placeholder="Ex: Qual a duração do curso?") ], outputs=[ gr.Textbox(label="Resposta"), gr.Textbox(label="Fontes utilizadas") ], title="Chat com PDF (LangChain)", description="Carregue um PDF e faça perguntas sobre ele. Powered by LangChain + Hugging Face Embeddings" ) if __name__ == "__main__": interface.launch()