Spaces:
Build error
Build error
File size: 2,446 Bytes
f86cb9c b6f606c 1a381aa f86cb9c 9bb5aaa b6f606c f86cb9c 228bd0f b6f606c f86cb9c b6f606c 6d76ee3 2f674ad 6d76ee3 6527359 b6f606c ad0b19c b6f606c ad0b19c b6f606c ad0b19c b6f606c dbaf019 b6f606c f86cb9c 228bd0f ad0b19c 2f674ad ad0b19c b6f606c 6527359 b6f606c f86cb9c b6f606c f86cb9c 541d9fe ad0b19c b6f606c 228bd0f ad0b19c f86cb9c 9bb5aaa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import os, pdfplumber, gradio as gr
from huggingface_hub import hf_hub_download
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
from langchain.chains import RetrievalQA
token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not token:
raise ValueError("โ Hugging Face Token ๋๋ฝ (Spaces Secrets์ HUGGINGFACEHUB_API_TOKEN ์ค์ ํ์)")
llm = HuggingFaceEndpoint(repo_id="meta-llama/Llama-3.2-3B-Instruct", huggingfacehub_api_token=token, temperature=0.7, task="text-generation")
pdf_files = [
"(20220324) L2 Switch ์ด์ฉ ๋งค๋ด์ผ_Innovation TF_Ver3.1_OCR.pdf",
"(20230504) 23๋
๊ธฐ์ ๊ต์ก ๊ต์ฌ 1 (LTE)_๊ฐ์นํ์ ํ_OCR.pdf",
"(20230531) 23๋
๊ธฐ์ ๊ต์ก ๊ต์ฌ 2 (5G)_๊ฐ์นํ์ ํ_OCR.pdf",
"์ฐจ๋จ๊ธฐ ์ข
๋ฅ ๋ฐ ์ฉ๋_OCR.pdf"
]
docs = []
for name in pdf_files:
try:
pdf_path = hf_hub_download(repo_id="dgmos/ericsson-manuals", filename=name, repo_type="dataset", token=token)
with pdfplumber.open(pdf_path) as pdf:
text_pages = [p.extract_text().replace("\n", " ") for p in pdf.pages if p.extract_text()]
text = "\\n".join(text_pages)
if text.strip():
docs.append({"page_content": text})
except Exception as e:
print(f"๐จ ์ค๋ฅ: {name} - {e}")
if not docs:
raise ValueError("โ PDF ์ถ์ถ ์คํจ (docs ๋น์ด์์)")
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
texts = splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
vectorstore = FAISS.from_documents(texts, embeddings)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever(search_kwargs={"k":3}), chain_type="stuff")
def chatbot(q):
try:
return qa_chain.run(q)
except Exception as e:
return f"โ ์ค๋ฅ: {str(e)}"
with gr.Blocks(title="Ericsson RAG ์ฑ๋ด") as demo:
gr.Markdown("# โ๏ธ Ericsson LTE/5G ๋งค๋ด์ผ ์ฑ๋ด")
query = gr.Textbox(label="์ง๋ฌธ ์
๋ ฅ", placeholder="์: LTE ์คํจ๋ฆฌ์ด์ค ์์ธ์?")
output = gr.Textbox(label="์๋ต", lines=8)
btn = gr.Button("๋ถ์ ์์")
btn.click(chatbot, inputs=query, outputs=output)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)
|