import os, pdfplumber, gradio as gr from huggingface_hub import hf_hub_download from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint from langchain.chains import RetrievalQA token = os.getenv("HUGGINGFACEHUB_API_TOKEN") if not token: raise ValueError("❌ Hugging Face Token 누락 (Spaces Secrets에 HUGGINGFACEHUB_API_TOKEN 설정 필요)") llm = HuggingFaceEndpoint(repo_id="meta-llama/Llama-3.2-3B-Instruct", huggingfacehub_api_token=token, temperature=0.7, task="text-generation") pdf_files = [ "(20220324) L2 Switch 운용 매뉴얼_Innovation TF_Ver3.1_OCR.pdf", "(20230504) 23년 기술교육 교재 1 (LTE)_가치혁신팀_OCR.pdf", "(20230531) 23년 기술교육 교재 2 (5G)_가치혁신팀_OCR.pdf", "차단기 종류 및 용도_OCR.pdf" ] docs = [] for name in pdf_files: try: pdf_path = hf_hub_download(repo_id="dgmos/ericsson-manuals", filename=name, repo_type="dataset", token=token) with pdfplumber.open(pdf_path) as pdf: text_pages = [p.extract_text().replace("\n", " ") for p in pdf.pages if p.extract_text()] text = "\\n".join(text_pages) if text.strip(): docs.append({"page_content": text}) except Exception as e: print(f"🚨 오류: {name} - {e}") if not docs: raise ValueError("❌ PDF 추출 실패 (docs 비어있음)") splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100) texts = splitter.split_documents(docs) embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1") vectorstore = FAISS.from_documents(texts, embeddings) qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever(search_kwargs={"k":3}), chain_type="stuff") def chatbot(q): try: return qa_chain.run(q) except Exception as e: return f"❌ 오류: {str(e)}" with gr.Blocks(title="Ericsson RAG 챗봇") as demo: gr.Markdown("# ⚙️ Ericsson LTE/5G 매뉴얼 챗봇") query = gr.Textbox(label="질문 입력", placeholder="예: LTE 스퓨리어스 원인은?") output = gr.Textbox(label="응답", lines=8) btn = gr.Button("분석 시작") btn.click(chatbot, inputs=query, outputs=output) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)