File size: 2,446 Bytes
f86cb9c
b6f606c
1a381aa
f86cb9c
9bb5aaa
b6f606c
f86cb9c
 
228bd0f
b6f606c
 
f86cb9c
b6f606c
6d76ee3
2f674ad
 
 
 
 
 
6d76ee3
6527359
b6f606c
ad0b19c
b6f606c
ad0b19c
b6f606c
 
 
 
ad0b19c
b6f606c
dbaf019
b6f606c
 
f86cb9c
228bd0f
ad0b19c
2f674ad
ad0b19c
b6f606c
6527359
b6f606c
f86cb9c
b6f606c
f86cb9c
541d9fe
ad0b19c
b6f606c
 
 
228bd0f
 
ad0b19c
f86cb9c
 
9bb5aaa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58

import os, pdfplumber, gradio as gr
from huggingface_hub import hf_hub_download
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain.chains import RetrievalQA

token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not token:
    raise ValueError("โŒ Hugging Face Token ๋ˆ„๋ฝ (Spaces Secrets์— HUGGINGFACEHUB_API_TOKEN ์„ค์ • ํ•„์š”)")

llm = HuggingFaceEndpoint(repo_id="meta-llama/Llama-3.2-3B-Instruct", huggingfacehub_api_token=token, temperature=0.7, task="text-generation")

pdf_files = [
    "(20220324) L2 Switch ์šด์šฉ ๋งค๋‰ด์–ผ_Innovation TF_Ver3.1_OCR.pdf",
    "(20230504) 23๋…„ ๊ธฐ์ˆ ๊ต์œก ๊ต์žฌ 1 (LTE)_๊ฐ€์น˜ํ˜์‹ ํŒ€_OCR.pdf",
    "(20230531) 23๋…„ ๊ธฐ์ˆ ๊ต์œก ๊ต์žฌ 2 (5G)_๊ฐ€์น˜ํ˜์‹ ํŒ€_OCR.pdf",
    "์ฐจ๋‹จ๊ธฐ ์ข…๋ฅ˜ ๋ฐ ์šฉ๋„_OCR.pdf"
]

docs = []
for name in pdf_files:
    try:
        pdf_path = hf_hub_download(repo_id="dgmos/ericsson-manuals", filename=name, repo_type="dataset", token=token)
        with pdfplumber.open(pdf_path) as pdf:
            text_pages = [p.extract_text().replace("\n", " ") for p in pdf.pages if p.extract_text()]
        text = "\\n".join(text_pages)
        if text.strip():
            docs.append({"page_content": text})
    except Exception as e:
        print(f"๐Ÿšจ ์˜ค๋ฅ˜: {name} - {e}")

if not docs:
    raise ValueError("โŒ PDF ์ถ”์ถœ ์‹คํŒจ (docs ๋น„์–ด์žˆ์Œ)")

splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
texts = splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
vectorstore = FAISS.from_documents(texts, embeddings)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever(search_kwargs={"k":3}), chain_type="stuff")

def chatbot(q):
    try:
        return qa_chain.run(q)
    except Exception as e:
        return f"โŒ ์˜ค๋ฅ˜: {str(e)}"

with gr.Blocks(title="Ericsson RAG ์ฑ—๋ด‡") as demo:
    gr.Markdown("# โš™๏ธ Ericsson LTE/5G ๋งค๋‰ด์–ผ ์ฑ—๋ด‡")
    query = gr.Textbox(label="์งˆ๋ฌธ ์ž…๋ ฅ", placeholder="์˜ˆ: LTE ์Šคํ“จ๋ฆฌ์–ด์Šค ์›์ธ์€?")
    output = gr.Textbox(label="์‘๋‹ต", lines=8)
    btn = gr.Button("๋ถ„์„ ์‹œ์ž‘")
    btn.click(chatbot, inputs=query, outputs=output)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)