dgmos commited on
Commit
8ec8702
ยท
1 Parent(s): f2abe6e

Deploy chatbot update

Browse files
Files changed (1) hide show
  1. app.py +2 -3
app.py CHANGED
@@ -25,7 +25,7 @@ llm = HuggingFaceEndpoint(
25
  # 3. ๋Œ€์ƒ ๋ฐ์ดํ„ฐ์…‹ Repo ์ •๋ณด
26
  dataset_repo = "dgmos/ericsson-manuals"
27
 
28
- # 4. ์ฒ˜๋ฆฌํ•  PDF ํŒŒ์ผ ๋ฆฌ์ŠคํŠธ (๋ฐ์ดํ„ฐ์…‹์— ์˜ฌ๋ผ๊ฐ„ ์‹ค์ œ ํŒŒ์ผ๋ช…๊ณผ ๋™์ผํ•ด์•ผ ํ•จ)
29
  pdf_files = [
30
  "(20220324) L2 Switch ์šด์šฉ ๋งค๋‰ด์–ผ_Innovation TF_Ver3.1_OCR.pdf",
31
  "(20230504) 23๋…„ ๊ธฐ์ˆ ๊ต์œก ๊ต์žฌ 1 (LTE)_๊ฐ€์น˜ํ˜์‹ ํŒ€_OCR.pdf",
@@ -79,8 +79,7 @@ for fname in pdf_files:
79
  content = page.extract_text()
80
  if content:
81
  texts.append(content)
82
- text = "
83
- ".join(texts).strip()
84
  if text:
85
  docs.append({"page_content": text, "metadata": {"source": fname}})
86
  else:
 
25
  # 3. ๋Œ€์ƒ ๋ฐ์ดํ„ฐ์…‹ Repo ์ •๋ณด
26
  dataset_repo = "dgmos/ericsson-manuals"
27
 
28
+ # 4. ์ฒ˜๋ฆฌํ•  PDF ํŒŒ์ผ ๋ฆฌ์ŠคํŠธ
29
  pdf_files = [
30
  "(20220324) L2 Switch ์šด์šฉ ๋งค๋‰ด์–ผ_Innovation TF_Ver3.1_OCR.pdf",
31
  "(20230504) 23๋…„ ๊ธฐ์ˆ ๊ต์œก ๊ต์žฌ 1 (LTE)_๊ฐ€์น˜ํ˜์‹ ํŒ€_OCR.pdf",
 
79
  content = page.extract_text()
80
  if content:
81
  texts.append(content)
82
+ text = "\n".join(texts).strip() # ์ค„๋ฐ”๊ฟˆ ์ด์Šค์ผ€์ดํ”„ ์ฒ˜๋ฆฌ โ†’ ์ ˆ๋Œ€ ์ž๋™ ์ค„๋ฐ”๊ฟˆ ์•ˆ ์ƒ๊น€
 
83
  if text:
84
  docs.append({"page_content": text, "metadata": {"source": fname}})
85
  else: