dgmos's picture
Deploy via Colab Secrets
b6f606c
import os, pdfplumber, gradio as gr
from huggingface_hub import hf_hub_download
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain.chains import RetrievalQA
token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not token:
raise ValueError("โŒ Hugging Face Token ๋ˆ„๋ฝ (Spaces Secrets์— HUGGINGFACEHUB_API_TOKEN ์„ค์ • ํ•„์š”)")
llm = HuggingFaceEndpoint(repo_id="meta-llama/Llama-3.2-3B-Instruct", huggingfacehub_api_token=token, temperature=0.7, task="text-generation")
pdf_files = [
"(20220324) L2 Switch ์šด์šฉ ๋งค๋‰ด์–ผ_Innovation TF_Ver3.1_OCR.pdf",
"(20230504) 23๋…„ ๊ธฐ์ˆ ๊ต์œก ๊ต์žฌ 1 (LTE)_๊ฐ€์น˜ํ˜์‹ ํŒ€_OCR.pdf",
"(20230531) 23๋…„ ๊ธฐ์ˆ ๊ต์œก ๊ต์žฌ 2 (5G)_๊ฐ€์น˜ํ˜์‹ ํŒ€_OCR.pdf",
"์ฐจ๋‹จ๊ธฐ ์ข…๋ฅ˜ ๋ฐ ์šฉ๋„_OCR.pdf"
]
docs = []
for name in pdf_files:
try:
pdf_path = hf_hub_download(repo_id="dgmos/ericsson-manuals", filename=name, repo_type="dataset", token=token)
with pdfplumber.open(pdf_path) as pdf:
text_pages = [p.extract_text().replace("\n", " ") for p in pdf.pages if p.extract_text()]
text = "\\n".join(text_pages)
if text.strip():
docs.append({"page_content": text})
except Exception as e:
print(f"๐Ÿšจ ์˜ค๋ฅ˜: {name} - {e}")
if not docs:
raise ValueError("โŒ PDF ์ถ”์ถœ ์‹คํŒจ (docs ๋น„์–ด์žˆ์Œ)")
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
texts = splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
vectorstore = FAISS.from_documents(texts, embeddings)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever(search_kwargs={"k":3}), chain_type="stuff")
def chatbot(q):
try:
return qa_chain.run(q)
except Exception as e:
return f"โŒ ์˜ค๋ฅ˜: {str(e)}"
with gr.Blocks(title="Ericsson RAG ์ฑ—๋ด‡") as demo:
gr.Markdown("# โš™๏ธ Ericsson LTE/5G ๋งค๋‰ด์–ผ ์ฑ—๋ด‡")
query = gr.Textbox(label="์งˆ๋ฌธ ์ž…๋ ฅ", placeholder="์˜ˆ: LTE ์Šคํ“จ๋ฆฌ์–ด์Šค ์›์ธ์€?")
output = gr.Textbox(label="์‘๋‹ต", lines=8)
btn = gr.Button("๋ถ„์„ ์‹œ์ž‘")
btn.click(chatbot, inputs=query, outputs=output)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)