Your Name commited on
Commit
f86cb9c
Β·
1 Parent(s): 155efea

Deploy Ericsson LLM chatbot with RAG

Browse files
Files changed (2) hide show
  1. app.py +63 -0
  2. requirements.txt +14 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from langchain.document_loaders import UnstructuredPDFLoader, CSVLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.llms import HuggingFaceHub
8
+ import gradio as gr
9
+ from PIL import Image
10
+ import pytesseract
11
+
12
+ repo_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
13
+ llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.7})
14
+
15
+ def load_documents(files):
16
+ docs = []
17
+ for file_path in files:
18
+ if file_path.endswith(".pdf") or file_path.endswith(".pptx"):
19
+ loader = UnstructuredPDFLoader(file_path)
20
+ docs.extend(loader.load())
21
+ elif file_path.endswith(".csv"):
22
+ loader = CSVLoader(file_path)
23
+ docs.extend(loader.load())
24
+ elif file_path.endswith((".jpg", ".png")):
25
+ img = Image.open(file_path)
26
+ text = pytesseract.image_to_string(img, lang="kor+eng")
27
+ docs.append(text)
28
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
29
+ texts = splitter.split_documents(docs)
30
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
31
+ vectorstore = FAISS.from_documents(texts, embeddings)
32
+ return vectorstore
33
+
34
+ def create_rag_chain(vectorstore):
35
+ qa_chain = RetrievalQA.from_chain_type(
36
+ llm=llm,
37
+ chain_type="stuff",
38
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 3})
39
+ )
40
+ return qa_chain
41
+
42
+ def chatbot(query, files):
43
+ if not files:
44
+ return "파일 μ—…λ‘œλ“œ ν•„μš” (PDF/CSV/이미지/PPTX)."
45
+ try:
46
+ vectorstore = load_documents(files)
47
+ qa_chain = create_rag_chain(vectorstore)
48
+ response = qa_chain.run(query)
49
+ return response
50
+ except Exception as e:
51
+ return f"였λ₯˜: {str(e)}."
52
+
53
+ with gr.Blocks(title="Ericsson μž₯λΉ„ 뢄석 챗봇") as demo:
54
+ gr.Markdown("# πŸš€ 3G/LTE/5G μž₯λΉ„ λΆˆλŸ‰/λΆˆμš”νŒŒ 뢄석")
55
+ gr.Markdown("PDF/CSV/이미지 μ—…λ‘œλ“œ ν›„ 질문: e.g., 'Spurious Emission 톡계?'")
56
+ query = gr.Textbox(label="질문 (ν•œκ΅­μ–΄/μ˜μ–΄)", placeholder="μž₯μ•  원인 λΆ„μ„ν•΄μ€˜")
57
+ files = gr.File(label="파일 μ—…λ‘œλ“œ", file_count="multiple")
58
+ output = gr.Textbox(label="응닡", lines=10)
59
+ btn = gr.Button("뢄석 μ‹œμž‘!")
60
+ btn.click(chatbot, inputs=[query, files], outputs=output)
61
+
62
+ if __name__ == "__main__":
63
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ transformers
3
+ torch
4
+ langchain
5
+ gradio
6
+ unstructured[all-docs]
7
+ faiss-cpu
8
+ peft
9
+ datasets
10
+ pandas
11
+ pytesseract
12
+ pillow
13
+ sentence-transformers
14
+ huggingface_hub