ChaTech / app.py
m00k10m's picture
Update app.py
1388133 verified
import os
import argparse
from pathlib import Path
from crawl import *
from chatbot import *
def main(args):
# ํ—ˆ๊น…ํŽ˜์ด์Šค secret key๋กœ๋ถ€ํ„ฐ api key ์ฝ์–ด์˜ค๊ธฐ
api_key = args.api_key
if not api_key:
api_key = str(os.environ.get("GROQ_API_KEY", ""))
print(f"Groq API Key๋ฅผ ์„ฑ๊ณต์ ์œผ๋กœ ๋ถˆ๋Ÿฌ์™”์Šต๋‹ˆ๋‹ค. ๋’ท์ž๋ฆฌ 4๊ธ€์ž : ...{api_key[-4:]}")
if not api_key:
print("API Key๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. Hugging Face Secrets์—์„œ 'GROQ_API_KEY'๋ฅผ ์„ค์ •ํ•˜๊ฑฐ๋‚˜ --api_key ์ธ์ž๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.")
# ํฌ๋กค๋Ÿฌ ํŒŒํŠธ
abs_download_path = os.path.join(args.base_dir, args.download_dir)
abs_db_path = os.path.join(args.base_dir, args.db_dir)
collection = make_db(abs_download_path, abs_db_path, args.collection_name)
# ๊ธฐ๋ณธ ์ž„๋ฒ ๋”ฉ ํ•จ์ˆ˜ ์™ธ์˜ ํ•จ์ˆ˜๋ฅผ ์ด์šฉํ•  ๊ฒฝ์šฐ
#collection = make_db(abs_download_path, abs_db_path, args.collection_name, embedf_name = args.embedf_name)
crawl_seoultech_notice(abs_download_path, args.base_url, args.num_page, collection)
# ์ฑ—๋ด‡ ํŒŒํŠธ
collection = get_chroma_collection(abs_db_path, args.collection_name)
# embedding function๋กœ ๋‹ค๋ฅธ ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•  ๊ฒฝ์šฐ
# collection = get_chroma_collection(abs_db_path, args.collection_name, embedf_name = args.embedf_name)
if collection is None:
print("Chromadb Collection์„ ๋ถˆ๋Ÿฌ์˜ค์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ํ”„๋กœ๊ทธ๋žจ์„ ์ข…๋ฃŒํ•ฉ๋‹ˆ๋‹ค. ")
return
# ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
system_prompt = get_system_prompt(args.prompt_type)
# ์ฑ—๋ด‡ ์‹คํ–‰
chat_with_rag(api_key = api_key,
collection = collection,
system_prompt = system_prompt,
args = args)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
# ๊ณตํ†ต ์ธ์ž
parser.add_argument("--base_dir", type = str, default = str(Path(__file__).resolve().parent)) # ํ˜„์žฌ ์ด ํŒŒ์ผ์ด ์žˆ๋Š” ๋””๋ ‰ํ† ๋ฆฌ
parser.add_argument("--db_dir", type = str, default = "seoultech_data_db")
parser.add_argument("--collection_name", type = str, default = "seoultech_notices")
parser.add_argument("--embedf_name", type = str, default = "BAAI/bge-m3")
# ํฌ๋กค๋Ÿฌ
parser.add_argument("--base_url", type = str, default = "https://www.seoultech.ac.kr/service/info/notice")
parser.add_argument("--download_dir", type = str, default = "seoultech_data_download")
parser.add_argument("--header", type = dict, default = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"})
parser.add_argument("--num_page", type = int, default = 1)
# ์ฑ—๋ด‡
parser.add_argument("--api_key", type = str, default = "")
parser.add_argument("--log_dir", type = str, default = "chat_log")
parser.add_argument("--model_name", type = str, default = "llama-3.3-70b-versatile") # llama-3.1-8b-instant llama-3.3-70b-versatile openai/gpt-oss-120b
parser.add_argument("--temperature", type = float, default = 0.5)
parser.add_argument("--n_results", type = int, default = 3)
parser.add_argument("--prompt_type", type = str, default = "v")
args = parser.parse_args()
main(args)