m00k10m commited on
Commit
3e9bb33
ยท
verified ยท
1 Parent(s): 52a84f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -69
app.py CHANGED
@@ -1,70 +1,71 @@
1
- import os
2
- import argparse
3
- from pathlib import Path
4
- from crawl import *
5
- from chatbot import *
6
-
7
-
8
- def main(args):
9
- # ํ—ˆ๊น…ํŽ˜์ด์Šค secret key๋กœ๋ถ€ํ„ฐ api key ์ฝ์–ด์˜ค๊ธฐ
10
- api_key = args.api_key
11
- if not api_key:
12
- api_key = os.environ.get("GROQ_API_KEY", "")
13
-
14
- if not api_key:
15
- print("API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. Hugging Face Secrets์—์„œ 'GROQ_API_KEY'๋ฅผ ์„ค์ •ํ•˜๊ฑฐ๋‚˜ --api_key ์ธ์ž๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.")
16
-
17
-
18
- # ํฌ๋กค๋Ÿฌ ํŒŒํŠธ
19
- abs_download_path = os.path.join(args.base_dir, args.download_dir)
20
- abs_db_path = os.path.join(args.base_dir, args.db_dir)
21
-
22
- collection = make_db(abs_download_path, abs_db_path, args.collection_name)
23
- # ๊ธฐ๋ณธ ์ž„๋ฒ ๋”ฉ ํ•จ์ˆ˜ ์™ธ์˜ ํ•จ์ˆ˜๋ฅผ ์ด์šฉํ•  ๊ฒฝ์šฐ
24
- #collection = make_db(abs_download_path, abs_db_path, args.collection_name, embedf_name = args.embedf_name)
25
-
26
- crawl_seoultech_notice(abs_download_path, args.base_url, args.num_page, collection)
27
-
28
- # ์ฑ—๋ด‡ ํŒŒํŠธ
29
- collection = get_chroma_collection(abs_db_path, args.collection_name)
30
- # embedding function๋กœ ๋‹ค๋ฅธ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•  ๊ฒฝ์šฐ
31
- # collection = get_chroma_collection(abs_db_path, args.collection_name, embedf_name = args.embedf_name)
32
-
33
- if collection is None:
34
- print("Chromadb Collection์„ ๋ถˆ๋Ÿฌ์˜ค์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ํ”„๋กœ๊ทธ๋žจ์„ ์ข…๋ฃŒํ•ฉ๋‹ˆ๋‹ค. ")
35
- return
36
-
37
- # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
38
- system_prompt = get_system_prompt(args.prompt_type)
39
-
40
- # ์ฑ—๋ด‡ ์‹คํ–‰
41
- chat_with_rag(api_key = args.api_key,
42
- collection = collection,
43
- system_prompt = system_prompt,
44
- args = args)
45
-
46
-
47
- if __name__ == "__main__":
48
- parser = argparse.ArgumentParser()
49
- # ๊ณตํ†ต ์ธ์ž
50
- parser.add_argument("--base_dir", type = str, default = str(Path(__file__).resolve().parent)) # ํ˜„์žฌ ์ด ํŒŒ์ผ์ด ์žˆ๋Š” ๋””๋ ‰ํ† ๋ฆฌ
51
- parser.add_argument("--db_dir", type = str, default = "seoultech_data_db")
52
- parser.add_argument("--collection_name", type = str, default = "seoultech_notices")
53
- parser.add_argument("--embedf_name", type = str, default = "BAAI/bge-m3")
54
-
55
- # ํฌ๋กค๋Ÿฌ
56
- parser.add_argument("--base_url", type = str, default = "https://www.seoultech.ac.kr/service/info/notice")
57
- parser.add_argument("--download_dir", type = str, default = "seoultech_data_download")
58
- parser.add_argument("--header", type = dict, default = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"})
59
- parser.add_argument("--num_page", type = int, default = 1)
60
-
61
- # ์ฑ—๋ด‡
62
- parser.add_argument("--api_key", type = str, default = "")
63
- parser.add_argument("--log_dir", type = str, default = "chat_log")
64
- parser.add_argument("--model_name", type = str, default = "llama-3.3-70b-versatile") # llama-3.1-8b-instant llama-3.3-70b-versatile openai/gpt-oss-120b
65
- parser.add_argument("--temperature", type = float, default = 0.5)
66
- parser.add_argument("--n_results", type = int, default = 3)
67
- parser.add_argument("--prompt_type", type = str, default = "v")
68
-
69
- args = parser.parse_args()
 
70
  main(args)
 
1
+ import os
2
+ import argparse
3
+ from pathlib import Path
4
+ from crawl import *
5
+ from chatbot import *
6
+
7
+
8
+ def main(args):
9
+ # ํ—ˆ๊น…ํŽ˜์ด์Šค secret key๋กœ๋ถ€ํ„ฐ api key ์ฝ์–ด์˜ค๊ธฐ
10
+ api_key = args.api_key
11
+ if not api_key:
12
+ api_key = str(os.environ.get("GROQ_API_KEY", ""))
13
+ print(f"Groq API Key๋ฅผ ์„ฑ๊ณต์ ์œผ๋กœ ๋ถˆ๋Ÿฌ์™”์Šต๋‹ˆ๋‹ค. ๋’ท์ž๋ฆฌ 4๊ธ€์ž : ...{api_key[-4:]}")
14
+
15
+ if not api_key:
16
+ print("API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. Hugging Face Secrets์—์„œ 'GROQ_API_KEY'๋ฅผ ์„ค์ •ํ•˜๊ฑฐ๋‚˜ --api_key ์ธ์ž๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.")
17
+
18
+
19
+ # ํฌ๋กค๋Ÿฌ ํŒŒํŠธ
20
+ abs_download_path = os.path.join(args.base_dir, args.download_dir)
21
+ abs_db_path = os.path.join(args.base_dir, args.db_dir)
22
+
23
+ collection = make_db(abs_download_path, abs_db_path, args.collection_name)
24
+ # ๊ธฐ๋ณธ ์ž„๋ฒ ๋”ฉ ํ•จ์ˆ˜ ์™ธ์˜ ํ•จ์ˆ˜๋ฅผ ์ด์šฉํ•  ๊ฒฝ์šฐ
25
+ #collection = make_db(abs_download_path, abs_db_path, args.collection_name, embedf_name = args.embedf_name)
26
+
27
+ crawl_seoultech_notice(abs_download_path, args.base_url, args.num_page, collection)
28
+
29
+ # ์ฑ—๋ด‡ ํŒŒํŠธ
30
+ collection = get_chroma_collection(abs_db_path, args.collection_name)
31
+ # embedding function๋กœ ๋‹ค๋ฅธ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•  ๊ฒฝ์šฐ
32
+ # collection = get_chroma_collection(abs_db_path, args.collection_name, embedf_name = args.embedf_name)
33
+
34
+ if collection is None:
35
+ print("Chromadb Collection์„ ๋ถˆ๋Ÿฌ์˜ค์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ํ”„๋กœ๊ทธ๋žจ์„ ์ข…๋ฃŒํ•ฉ๋‹ˆ๋‹ค. ")
36
+ return
37
+
38
+ # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
39
+ system_prompt = get_system_prompt(args.prompt_type)
40
+
41
+ # ์ฑ—๋ด‡ ์‹คํ–‰
42
+ chat_with_rag(api_key = args.api_key,
43
+ collection = collection,
44
+ system_prompt = system_prompt,
45
+ args = args)
46
+
47
+
48
+ if __name__ == "__main__":
49
+ parser = argparse.ArgumentParser()
50
+ # ๊ณตํ†ต ์ธ์ž
51
+ parser.add_argument("--base_dir", type = str, default = str(Path(__file__).resolve().parent)) # ํ˜„์žฌ ์ด ํŒŒ์ผ์ด ์žˆ๋Š” ๋””๋ ‰ํ† ๋ฆฌ
52
+ parser.add_argument("--db_dir", type = str, default = "seoultech_data_db")
53
+ parser.add_argument("--collection_name", type = str, default = "seoultech_notices")
54
+ parser.add_argument("--embedf_name", type = str, default = "BAAI/bge-m3")
55
+
56
+ # ํฌ๋กค๋Ÿฌ
57
+ parser.add_argument("--base_url", type = str, default = "https://www.seoultech.ac.kr/service/info/notice")
58
+ parser.add_argument("--download_dir", type = str, default = "seoultech_data_download")
59
+ parser.add_argument("--header", type = dict, default = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"})
60
+ parser.add_argument("--num_page", type = int, default = 1)
61
+
62
+ # ์ฑ—๋ด‡
63
+ parser.add_argument("--api_key", type = str, default = "")
64
+ parser.add_argument("--log_dir", type = str, default = "chat_log")
65
+ parser.add_argument("--model_name", type = str, default = "llama-3.3-70b-versatile") # llama-3.1-8b-instant llama-3.3-70b-versatile openai/gpt-oss-120b
66
+ parser.add_argument("--temperature", type = float, default = 0.5)
67
+ parser.add_argument("--n_results", type = int, default = 3)
68
+ parser.add_argument("--prompt_type", type = str, default = "v")
69
+
70
+ args = parser.parse_args()
71
  main(args)