- SQL_Example.txt +65 -0
- app.py +9 -0
- core/cronjob.py +196 -0
- core/database.py +34 -0
- core/dependencies.py +39 -0
- core/models.py +15 -0
- requirements.txt +9 -1
- router/image_embedding_router.py +35 -0
- test_gemini.py +43 -0
- test_image_embedding.py +34 -0
- test_rss.py +11 -0
SQL_Example.txt
CHANGED
|
@@ -20,4 +20,69 @@ CREATE INDEX idx_content_embedding ON t_test_textembedding USING hnsw (content_e
|
|
| 20 |
|
| 21 |
--------------------------------
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
|
|
|
|
|
| 20 |
|
| 21 |
--------------------------------
|
| 22 |
|
| 23 |
+
-- 1. pgvector ํ์ฅ์ด ์๋ค๋ฉด ๋จผ์ ์์ฑํด์ผ ํฉ๋๋ค.
|
| 24 |
+
CREATE EXTENSION IF NOT EXISTS vector;
|
| 25 |
+
|
| 26 |
+
-- 2. ํ
์ด๋ธ ์์ฑ
|
| 27 |
+
CREATE TABLE t_test_imgembedding (
|
| 28 |
+
id BIGSERIAL PRIMARY KEY, -- PK (์๋ ์ฆ๊ฐ)
|
| 29 |
+
title VARCHAR ,
|
| 30 |
+
url VARCHAR ,
|
| 31 |
+
mimetype VARCHAR ,
|
| 32 |
+
img_embedding VECTOR(1280), -- ๋ด์ฉ ์๋ฒ ๋ฉ (768์ฐจ์)
|
| 33 |
+
created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP -- ์์ฑ์ผ
|
| 34 |
+
);
|
| 35 |
+
|
| 36 |
+
-- 3. (์ ํ) ๋ฒกํฐ ๊ฒ์ ์ฑ๋ฅ์ ๋์ด๊ธฐ ์ํ ์ธ๋ฑ์ค ์์ฑ (HNSW ์๊ณ ๋ฆฌ์ฆ, ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ธฐ์ค)
|
| 37 |
+
CREATE INDEX idx_test_imgembedding ON t_test_imgembedding USING hnsw (img_embedding vector_cosine_ops);
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
----------------------------------
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
// title_embedding_arr, content_embedding_arr๋
|
| 46 |
+
// ONNX ๋ชจ๋ธ์์ ์ถ์ถํ 768๊ฐ์ ์ซ์๊ฐ ๋ด๊ธด ๋ฐฐ์ด(Array)์
๋๋ค.
|
| 47 |
+
|
| 48 |
+
let insertData = await db.query(
|
| 49 |
+
`
|
| 50 |
+
INSERT INTO t_test_textembedding (title, title_embedding, content, content_embedding)
|
| 51 |
+
VALUES ($1, $2, $3, $4)
|
| 52 |
+
RETURNING id, title, created_at;
|
| 53 |
+
`,
|
| 54 |
+
[
|
| 55 |
+
title,
|
| 56 |
+
JSON.stringify(title_embedding_arr), // DB ๋๋ผ์ด๋ฒ ํธํ์ฑ์ ์ํด ๋ฌธ์์ด ํฌ๋งท '[...]' ์ผ๋ก ๋ณํ
|
| 57 |
+
content,
|
| 58 |
+
JSON.stringify(content_embedding_arr)
|
| 59 |
+
]
|
| 60 |
+
);
|
| 61 |
+
|
| 62 |
+
console.log('์์ฑ๋ ๋ฐ์ดํฐ:', insertData.rows[0]);
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
-----------------------------------------
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
// query_embedding_arr๋ ์ฌ์ฉ์์ ๊ฒ์์ด๋ฅผ ONNX ๋ชจ๋ธ์ ๋๋ ค ๋์จ ์๋ฒ ๋ฉ ๋ฐฐ์ด์
๋๋ค.
|
| 70 |
+
|
| 71 |
+
let searchResult = await db.query(
|
| 72 |
+
`
|
| 73 |
+
SELECT
|
| 74 |
+
id,
|
| 75 |
+
title,
|
| 76 |
+
content,
|
| 77 |
+
-- ์ฝ์ฌ์ธ ๊ฑฐ๋ฆฌ๋ 0์ ๊ฐ๊น์ธ์๋ก ์ ์ฌํ๋ฏ๋ก, ์ง๊ด์ ์ธ '์ ์ฌ๋ ์ ์'๋ฅผ ์ํด 1์์ ๋บ๋๋ค.
|
| 78 |
+
1 - (content_embedding <=> $1) AS similarity_score
|
| 79 |
+
FROM t_test_textembedding
|
| 80 |
+
-- ์ ์ฌ๋ ์๊ณ๊ฐ ์ค์ (์: ์ ํ๋ ์ ์๊ฐ 0.5 ์ด์์ธ ๊ฒ๋ง)
|
| 81 |
+
WHERE 1 - (content_embedding <=> $1) > 0.5
|
| 82 |
+
ORDER BY content_embedding <=> $1 ASC
|
| 83 |
+
LIMIT 5;
|
| 84 |
+
`,
|
| 85 |
+
[JSON.stringify(query_embedding_arr)]
|
| 86 |
+
);
|
| 87 |
|
| 88 |
+
console.log('์ ์ฌํ ๋ฐ์ดํฐ ๋ชฉ๋ก:', searchResult.rows);
|
app.py
CHANGED
|
@@ -5,6 +5,8 @@ from contextlib import asynccontextmanager
|
|
| 5 |
|
| 6 |
from router import llamindex_router
|
| 7 |
from router import embedding_router
|
|
|
|
|
|
|
| 8 |
|
| 9 |
@asynccontextmanager
|
| 10 |
async def lifespan_manager(app: FastAPI):
|
|
@@ -12,9 +14,15 @@ async def lifespan_manager(app: FastAPI):
|
|
| 12 |
์๋ฒ ์์ ์ ๋ชจ๋ธ์ ๋ก๋ํ๊ณ ์ข
๋ฃ ์ ์ ๋ฆฌํฉ๋๋ค.
|
| 13 |
"""
|
| 14 |
|
|
|
|
|
|
|
|
|
|
| 15 |
# ์๋ฒ๊ฐ ์์ฒญ ์ฒ๋ฆฌ๋ฅผ ์์ํ๋๋ก ์ ์ด๊ถ์ ๋๊ฒจ์ค๋๋ค.
|
| 16 |
yield
|
| 17 |
|
|
|
|
|
|
|
|
|
|
| 18 |
# FastAPI ์ ํ๋ฆฌ์ผ์ด์
์ด๊ธฐํ
|
| 19 |
app = FastAPI(
|
| 20 |
title="RAG+LLM",
|
|
@@ -33,6 +41,7 @@ app.add_middleware(
|
|
| 33 |
|
| 34 |
app.include_router(llamindex_router.router, prefix="/llama_index")
|
| 35 |
app.include_router(embedding_router.router, prefix="/embedding")
|
|
|
|
| 36 |
|
| 37 |
# ํฌ์ค ์ฒดํฌ์ฉ ๊ธฐ๋ณธ ์๋ํฌ์ธํธ
|
| 38 |
@app.get("/", summary="API ํฌ์ค ์ฒดํฌ")
|
|
|
|
| 5 |
|
| 6 |
from router import llamindex_router
|
| 7 |
from router import embedding_router
|
| 8 |
+
from router import image_embedding_router
|
| 9 |
+
from core.cronjob import news_scheduler
|
| 10 |
|
| 11 |
@asynccontextmanager
|
| 12 |
async def lifespan_manager(app: FastAPI):
|
|
|
|
| 14 |
์๋ฒ ์์ ์ ๋ชจ๋ธ์ ๋ก๋ํ๊ณ ์ข
๋ฃ ์ ์ ๋ฆฌํฉ๋๋ค.
|
| 15 |
"""
|
| 16 |
|
| 17 |
+
# ์ค์ผ์ค๋ฌ ์์
|
| 18 |
+
#news_scheduler.start()
|
| 19 |
+
|
| 20 |
# ์๋ฒ๊ฐ ์์ฒญ ์ฒ๋ฆฌ๋ฅผ ์์ํ๋๋ก ์ ์ด๊ถ์ ๋๊ฒจ์ค๋๋ค.
|
| 21 |
yield
|
| 22 |
|
| 23 |
+
# ์ค์ผ์ค๋ฌ ์ข
๋ฃ
|
| 24 |
+
#news_scheduler.shutdown()
|
| 25 |
+
|
| 26 |
# FastAPI ์ ํ๋ฆฌ์ผ์ด์
์ด๊ธฐํ
|
| 27 |
app = FastAPI(
|
| 28 |
title="RAG+LLM",
|
|
|
|
| 41 |
|
| 42 |
app.include_router(llamindex_router.router, prefix="/llama_index")
|
| 43 |
app.include_router(embedding_router.router, prefix="/embedding")
|
| 44 |
+
app.include_router(image_embedding_router.router, prefix="/image_embedding")
|
| 45 |
|
| 46 |
# ํฌ์ค ์ฒดํฌ์ฉ ๊ธฐ๋ณธ ์๋ํฌ์ธํธ
|
| 47 |
@app.get("/", summary="API ํฌ์ค ์ฒดํฌ")
|
core/cronjob.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
| 2 |
+
from apscheduler.triggers.cron import CronTrigger
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
import urllib.parse
|
| 5 |
+
import feedparser
|
| 6 |
+
import asyncio
|
| 7 |
+
import pytz
|
| 8 |
+
from time import mktime
|
| 9 |
+
|
| 10 |
+
# ์ถ๊ฐ๋ ์ํฌํธ (DB ๋ฐ ๋ชจ๋ธ, ์๋ฒ ๋ฉ)
|
| 11 |
+
from core.database import SessionLocal
|
| 12 |
+
from core.models import NewsEmbedding
|
| 13 |
+
from core.dependencies import get_embedding_model
|
| 14 |
+
import os
|
| 15 |
+
import requests
|
| 16 |
+
from newspaper import Article
|
| 17 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 18 |
+
from langchain_core.messages import HumanMessage
|
| 19 |
+
from apscheduler.triggers.interval import IntervalTrigger
|
| 20 |
+
|
| 21 |
+
# Initialize Gemini
|
| 22 |
+
llm = ChatGoogleGenerativeAI(
|
| 23 |
+
model="gemini-2.5-flash-lite",
|
| 24 |
+
temperature=0.1,
|
| 25 |
+
google_api_key=os.getenv("GOOGLE_API_KEY")
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# (is_real_gold_news ํจ์๋ ๊ธฐ์กด๊ณผ ๋์ผํ๊ฒ ์ ์ง)
|
| 29 |
+
def is_real_gold_news(title):
|
| 30 |
+
title_lower = title.lower()
|
| 31 |
+
black_list = ["๊ธ์์ผ", "๋ณด์กฐ๊ธ", "์ฅํ๊ธ", "์ง์๊ธ", "๋ฒ๊ธ", "์ถ๊ธ", "์
๊ธ", "๊ธ์ง", "์ก๊ธ", "๋์ถ๊ธ", "๋ชจ๊ธ", "๊ธฐ๊ธ", "๊ณผ์ง๊ธ", "golden retriever", "golden state", "golden globe", "golden rule", "marigold"]
|
| 32 |
+
white_list = ["์จ์ค", "๊ณจ๋๋ฐ", "์์ธ", "์๊ธ", "๊ฑฐ๋์", "๋ฌ๋ฌ", "ํฌ์", "๊ธ๊ฐ", "ํ๊ตญ๊ธ๊ฑฐ๋์", "krx", "๊ธํ๋", "ounce", "bullion", "price", "market", "fed", "inflation", "xau", "spot", "invest"]
|
| 33 |
+
|
| 34 |
+
if any(bad_word in title_lower for bad_word in black_list): return False
|
| 35 |
+
if any(good_word in title_lower for good_word in white_list): return True
|
| 36 |
+
return False
|
| 37 |
+
|
| 38 |
+
# 2. RSS ํผ๋๋ฅผ ๊ฐ์ ธ์์ DB์ ์ ์ฅํ๋ ํจ์๋ก ์
๊ทธ๋ ์ด๋
|
| 39 |
+
def fetch_filter_and_save_news(keyword, hl, gl, ceid, db_session, max_news=3):
|
| 40 |
+
url_keyword = urllib.parse.quote(keyword)
|
| 41 |
+
rss_url = f"https://news.google.com/rss/search?q={url_keyword}&hl={hl}&gl={gl}&ceid={ceid}"
|
| 42 |
+
|
| 43 |
+
feed = feedparser.parse(rss_url)
|
| 44 |
+
valid_news = []
|
| 45 |
+
|
| 46 |
+
# dependencies.py์ ์๋ ์๋ฒ ๋ฉ ๋ชจ๋ธ ์ธ์คํด์ค ๊ฐ์ ธ์ค๊ธฐ
|
| 47 |
+
embedder = get_embedding_model()
|
| 48 |
+
|
| 49 |
+
for entry in feed.entries:
|
| 50 |
+
if len(valid_news) >= max_news:
|
| 51 |
+
break
|
| 52 |
+
|
| 53 |
+
if is_real_gold_news(entry.title):
|
| 54 |
+
# ๐ก [ํต์ฌ] ์ค๋ณต ๊ฒ์ฌ: DB์ ๋๊ฐ์ ์ ๋ชฉ์ ๊ธฐ์ฌ๊ฐ ์ด๋ฏธ ์๋์ง ํ์ธ
|
| 55 |
+
exists = db_session.query(NewsEmbedding).filter(NewsEmbedding.title == entry.title).first()
|
| 56 |
+
if exists:
|
| 57 |
+
continue # ์ด๋ฏธ DB์ ์์ผ๋ฉด ์คํตํ๊ณ ๋ค์ ๊ธฐ์ฌ๋ก ๋์ด๊ฐ
|
| 58 |
+
|
| 59 |
+
# ๐ก [ํต์ฌ] ๋ฐํ์ผ ์ถ์ถ: ํผ๋์์ ์ ๊ณตํ๋ ์๊ฐ(published_parsed)์ Datetime์ผ๋ก ๋ณํ
|
| 60 |
+
if hasattr(entry, 'published_parsed') and entry.published_parsed:
|
| 61 |
+
pub_date = datetime.fromtimestamp(mktime(entry.published_parsed), pytz.UTC)
|
| 62 |
+
else:
|
| 63 |
+
pub_date = datetime.now(pytz.UTC)
|
| 64 |
+
|
| 65 |
+
# Gemini๋ฅผ ํตํ ์ง์ง ๊ธ ๋ด์ค ํ์ธ ๋ฐ ์์ฝ
|
| 66 |
+
try:
|
| 67 |
+
# ๐ก [๊ฐ์ ] ๋ ๊ฐ๋ ฅํ ๋ธ๋ผ์ฐ์ ์์ฅ ํค๋
|
| 68 |
+
headers = {
|
| 69 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
|
| 70 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
| 71 |
+
'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
|
| 72 |
+
'Cache-Control': 'no-cache',
|
| 73 |
+
'Pragma': 'no-cache',
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
# 1. ๋จผ์ ๋ฆฌ๋ค์ด๋ ํธ๋ ์ต์ข
URL์ ๋ฐ๋
๋๋ค.
|
| 77 |
+
response = requests.get(entry.link, timeout=15, headers=headers, allow_redirects=True)
|
| 78 |
+
real_url = response.url
|
| 79 |
+
|
| 80 |
+
# 2. newspaper3k ์ค์ ์ ์ฉ
|
| 81 |
+
from newspaper import Config
|
| 82 |
+
config = Config()
|
| 83 |
+
config.browser_user_agent = headers['User-Agent']
|
| 84 |
+
config.request_timeout = 15
|
| 85 |
+
|
| 86 |
+
article = Article(real_url, config=config)
|
| 87 |
+
article.download()
|
| 88 |
+
article.parse()
|
| 89 |
+
|
| 90 |
+
article_text = article.text.strip()
|
| 91 |
+
|
| 92 |
+
# ๐ก [๊ฒ์ฆ] ๋ณธ๋ฌธ์ด ์๊ฑฐ๋ "Google News" ๊ป๋ฐ๊ธฐ๋ง ๊ธํ ๊ฒฝ์ฐ ์ฒดํฌ
|
| 93 |
+
if len(article_text) < 100 or "Google News" in article_text[:100]:
|
| 94 |
+
# ๋ง์ฝ newspaper๊ฐ ์คํจํ๋ฉด BeautifulSoup์ผ๋ก ์ฌ์๋ (์ตํ์ ์๋จ)
|
| 95 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 96 |
+
# ๋ด์ค ์ฌ์ดํธ๋ค์ด ๋ณดํต ์ฐ๋ ๋ณธ๋ฌธ ํ๊ทธ๋ค ์์ฃผ๋ก ํ
์คํธ ์ถ์ถ
|
| 97 |
+
article_text = ' '.join([p.text for p in soup.find_all('p') if len(p.text) > 20])
|
| 98 |
+
|
| 99 |
+
if len(article_text) < 100:
|
| 100 |
+
print(f"โ ๏ธ ๋ณธ๋ฌธ ์ถ์ถ ์คํจ (๋ด์ฉ ๋ถ์กฑ): {entry.title}")
|
| 101 |
+
continue
|
| 102 |
+
|
| 103 |
+
# ๋๋ฒ๊น
์ถ๋ ฅ
|
| 104 |
+
print(f"\n[๊ธฐ์ฌ ์ ๋ชฉ]: {entry.title}")
|
| 105 |
+
print(f"[์ค์ ์ฃผ์]: {real_url}")
|
| 106 |
+
print(f"[๋ณธ๋ฌธ ๋ฏธ๋ฆฌ๋ณด๊ธฐ]:\n{article_text[:300]}...\n")
|
| 107 |
+
print("-" * 50)
|
| 108 |
+
|
| 109 |
+
# ํ
์คํธ ์๋ฃ ์ ๊น์ง๋ API ํธ์ถ ๋ฐฉ์ง๋ฅผ ์ํด ์ ์ง
|
| 110 |
+
continue
|
| 111 |
+
|
| 112 |
+
prompt = f"""
|
| 113 |
+
๋ค์์ ๋ด์ค ๊ธฐ์ฌ ์๋ฌธ์
๋๋ค:
|
| 114 |
+
{article_text[:3000]}
|
| 115 |
+
|
| 116 |
+
์ด ๋ด์ค๊ฐ ๊ธ(Gold, ๊ท๊ธ์/ํฌ์์์ฐ/๊ธ๊ฐ)๊ณผ ๊ด๋ จ๋ ์ค์ ๋ด์ค์ธ์ง ํ๋ณํ๊ณ ,
|
| 117 |
+
๋ง๋ค๋ฉด ๊ธฐ์ฌ์ ํต์ฌ ๋ด์ฉ์ 1~2์ค๋ก ์์ฝํด์ฃผ์ธ์.
|
| 118 |
+
๋ง์ฝ ๊ธ๊ณผ ์ ํ ๊ด๋ จ์ด ์๋ ๋ด์ค๋ผ๋ฉด (์: ๊ธ์์ผ, ๋ฒ๊ธ, ์ฅํ๊ธ, ์ถ๊ธ, ์ก๊ธ, ๋ณด์กฐ๊ธ, ๋จ์ํ '๊ธ'์ด ํฌํจ๋ ๋จ์ด๋ง ์๋ ๊ธฐ์ฌ ๋ฑ)
|
| 119 |
+
'NOT_GOLD_NEWS' ๋ผ๊ณ ๋ง ์ ํํ ๋ต๋ณํ์ธ์.
|
| 120 |
+
|
| 121 |
+
์์ฝ ๊ฒฐ๊ณผ:
|
| 122 |
+
"""
|
| 123 |
+
response = llm.invoke([HumanMessage(content=prompt)])
|
| 124 |
+
summary = response.content.strip()
|
| 125 |
+
|
| 126 |
+
if summary == "NOT_GOLD_NEWS":
|
| 127 |
+
print(f"โ [Gemini ํํฐ๋ง] ๊ฐ์ง ๊ธ ๋ด์ค ์คํต: {entry.title}")
|
| 128 |
+
continue
|
| 129 |
+
|
| 130 |
+
content_text = summary
|
| 131 |
+
print(f"โ
[Gemini ์์ฝ ์ฑ๊ณต] ์์ฝ๋ฌธ: {content_text}")
|
| 132 |
+
|
| 133 |
+
except Exception as e:
|
| 134 |
+
print(f"โ ๏ธ ๊ธฐ์ฌ ๋ณธ๋ฌธ ์ถ์ถ ๋๋ ์์ฝ ์คํจ ({entry.title}): {e}")
|
| 135 |
+
# ์คํจํ์ ๋ ๋ฌด์ํ๊ณ ๋ค์ ๊ธฐ์ฌ๋ก ๋์ด๊ฐ๋๋ค
|
| 136 |
+
continue
|
| 137 |
+
|
| 138 |
+
# ๐ก ์๋ฒ ๋ฉ ์์ฑ (List[float] ํํ๋ก ๋ฐํ๋จ)
|
| 139 |
+
# content ์ปฌ๋ผ์ ๋ฃ์ ๋ฐ์ดํฐ ๊ตฌ์ฑ (๋ด์ค ์์ฝ)
|
| 140 |
+
|
| 141 |
+
title_emb = embedder.embed_query(entry.title)
|
| 142 |
+
content_emb = embedder.embed_query(content_text)
|
| 143 |
+
|
| 144 |
+
# DB ๋ชจ๋ธ ๊ฐ์ฒด ์์ฑ ๋ฐ ์ธ์
์ ์ถ๊ฐ
|
| 145 |
+
new_article = NewsEmbedding(
|
| 146 |
+
title=entry.title,
|
| 147 |
+
title_embedding=title_emb,
|
| 148 |
+
content=content_text,
|
| 149 |
+
content_embedding=content_emb,
|
| 150 |
+
created_at=pub_date
|
| 151 |
+
)
|
| 152 |
+
db_session.add(new_article)
|
| 153 |
+
|
| 154 |
+
valid_news.append((entry.title, entry.link))
|
| 155 |
+
print(f"โ
DB ์ถ๊ฐ ์์ฝ: {entry.title}")
|
| 156 |
+
|
| 157 |
+
# ๋ณ๊ฒฝ์ฌํญ์ DB์ ์ต์ข
๋ฐ์ (Commit)
|
| 158 |
+
if valid_news:
|
| 159 |
+
db_session.commit()
|
| 160 |
+
|
| 161 |
+
return valid_news
|
| 162 |
+
|
| 163 |
+
# 3. ๋ฉ์ธ ๊ฒ์ ํจ์ (DB ์ธ์
๊ด๋ฆฌ ์ถ๊ฐ)
|
| 164 |
+
async def search_gold_news():
|
| 165 |
+
print(f"\n=== ๐ [{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] ์๋ ๊ฒ์ ๋ฐ DB ์ ์ฅ ์์ ===")
|
| 166 |
+
|
| 167 |
+
# DB ์ธ์
์ด๊ธฐ
|
| 168 |
+
db = SessionLocal()
|
| 169 |
+
try:
|
| 170 |
+
print("\n[๊ตญ๋ด ๋ด์ค ๊ฒ์ ์ค...]")
|
| 171 |
+
kr_news = fetch_filter_and_save_news("๊ธ", "ko", "KR", "KR:ko", db, max_news=3)
|
| 172 |
+
if not kr_news:
|
| 173 |
+
print("์๋ก์ด ๊ตญ๋ด ๋ด์ค๊ฐ ์๊ฑฐ๋ ๋ชจ๋ ์ด๋ฏธ ์ ์ฅ๋ ๊ธฐ์ฌ์ผ.")
|
| 174 |
+
|
| 175 |
+
print("\n[ํด์ธ ๋ด์ค ๊ฒ์ ์ค...]")
|
| 176 |
+
en_news = fetch_filter_and_save_news("gold", "en", "US", "US:en", db, max_news=3)
|
| 177 |
+
if not en_news:
|
| 178 |
+
print("์๋ก์ด ํด์ธ ๋ด์ค๊ฐ ์๊ฑฐ๋ ๋ชจ๋ ์ด๋ฏธ ์ ์ฅ๋ ๊ธฐ์ฌ์ผ.")
|
| 179 |
+
|
| 180 |
+
except Exception as e:
|
| 181 |
+
print(f"โ DB ์ ์ฅ ์ค ์๋ฌ ๋ฐ์: {e}")
|
| 182 |
+
db.rollback()
|
| 183 |
+
finally:
|
| 184 |
+
# ์์
์ด ๋๋๋ฉด ๋ฌด์กฐ๊ฑด DB ์ธ์
๋ซ๊ธฐ
|
| 185 |
+
db.close()
|
| 186 |
+
|
| 187 |
+
print("==========================================\n")
|
| 188 |
+
|
| 189 |
+
# ์ค์ผ์ค๋ฌ ์ค์ (๊ธฐ์กด๊ณผ ๋์ผ)
|
| 190 |
+
def create_scheduler():
|
| 191 |
+
scheduler = AsyncIOScheduler(timezone="Asia/Seoul")
|
| 192 |
+
#scheduler.add_job(search_gold_news, CronTrigger(hour=18, minute=0))
|
| 193 |
+
scheduler.add_job(search_gold_news, IntervalTrigger(seconds=60))
|
| 194 |
+
return scheduler
|
| 195 |
+
|
| 196 |
+
news_scheduler = create_scheduler()
|
core/database.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import create_engine
|
| 2 |
+
from sqlalchemy.orm import sessionmaker, declarative_base
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
# .env ํ์ผ ๋ก๋ (์์ ๊ฒฝ์ฐ)
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
# ๋ฐ์ดํฐ๋ฒ ์ด์ค ์ฐ๊ฒฐ URL
|
| 10 |
+
# ๋ณด์์ ์ํด ์ค์ ํ๊ฒฝ์์๋ .env ํ์ผ์ด๋ ํ๊ฒฝ ๋ณ์์ DATABASE_URL์ ์ ์ฅํ๋ ๊ฒ์ด ์ข์ต๋๋ค.
|
| 11 |
+
# ์ฌ๊ธฐ์๋ ํ๋์ฝ๋ฉ๋ ๊ฐ์ ๊ธฐ๋ณธ๊ฐ์ผ๋ก ์ฌ์ฉํฉ๋๋ค.
|
| 12 |
+
SQLALCHEMY_DATABASE_URL = os.getenv("DATABASE_URL")
|
| 13 |
+
|
| 14 |
+
# SQLAlchemy ์์ง ์์ฑ
|
| 15 |
+
# Neon DB์ ๊ฐ์ ํด๋ผ์ฐ๋ DB๋ ์ฐ๊ฒฐ์ด ๋๊ธธ ์ ์์ผ๋ฏ๋ก pool_pre_ping=True ์ต์
์ ์ถ๊ฐํ์ฌ
|
| 16 |
+
# ์ฐ๊ฒฐ์ ํ์ธํ ํ ์ฌ์ฉํ๋ ๊ฒ์ด ์ข์ต๋๋ค.
|
| 17 |
+
engine = create_engine(
|
| 18 |
+
SQLALCHEMY_DATABASE_URL,
|
| 19 |
+
pool_pre_ping=True
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# ์ธ์
ํฉํ ๋ฆฌ ์์ฑ
|
| 23 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 24 |
+
|
| 25 |
+
# Base ํด๋์ค ์์ฑ
|
| 26 |
+
Base = declarative_base()
|
| 27 |
+
|
| 28 |
+
# DB ์ธ์
์์กด์ฑ ์ฃผ์
์ ์ํ ํจ์ (FastAPI ๋ผ์ฐํฐ์์ ์ฌ์ฉ)
|
| 29 |
+
def get_db():
|
| 30 |
+
db = SessionLocal()
|
| 31 |
+
try:
|
| 32 |
+
yield db
|
| 33 |
+
finally:
|
| 34 |
+
db.close()
|
core/dependencies.py
CHANGED
|
@@ -70,8 +70,38 @@ class OnnxGemmaWrapper(Embeddings):
|
|
| 70 |
def embed_query(self, text: str) -> List[float]:
|
| 71 |
return self.encode_query(text).tolist()
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
# ์ ์ญ ์ฑ๊ธํค ์ธ์คํด์ค ์ ์ฅ์
|
| 74 |
_embedding_model = None
|
|
|
|
| 75 |
|
| 76 |
def get_embedding_model() -> OnnxGemmaWrapper:
|
| 77 |
"""
|
|
@@ -84,3 +114,12 @@ def get_embedding_model() -> OnnxGemmaWrapper:
|
|
| 84 |
token=hf_token
|
| 85 |
)
|
| 86 |
return _embedding_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
def embed_query(self, text: str) -> List[float]:
|
| 71 |
return self.encode_query(text).tolist()
|
| 72 |
|
| 73 |
+
import torch
|
| 74 |
+
import torchvision.transforms as transforms
|
| 75 |
+
from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights
|
| 76 |
+
from PIL import Image
|
| 77 |
+
|
| 78 |
+
# ... (existing OnnxGemmaWrapper and get_embedding_model)
|
| 79 |
+
|
| 80 |
+
class EfficientNetV2Embedding:
|
| 81 |
+
def __init__(self):
|
| 82 |
+
print("Loading EfficientNetV2-S model...")
|
| 83 |
+
self.weights = EfficientNet_V2_S_Weights.DEFAULT
|
| 84 |
+
self.model = efficientnet_v2_s(weights=self.weights)
|
| 85 |
+
self.model.eval()
|
| 86 |
+
|
| 87 |
+
# Remove the classification head to get embeddings
|
| 88 |
+
self.model.classifier = torch.nn.Identity()
|
| 89 |
+
|
| 90 |
+
self.preprocess = self.weights.transforms()
|
| 91 |
+
print("EfficientNetV2-S model loaded successfully.")
|
| 92 |
+
|
| 93 |
+
def embed_image(self, image: Image.Image) -> List[float]:
|
| 94 |
+
# Preprocess image
|
| 95 |
+
img_tensor = self.preprocess(image).unsqueeze(0)
|
| 96 |
+
|
| 97 |
+
with torch.no_grad():
|
| 98 |
+
embedding = self.model(img_tensor)
|
| 99 |
+
|
| 100 |
+
return embedding.squeeze(0).tolist()
|
| 101 |
+
|
| 102 |
# ์ ์ญ ์ฑ๊ธํค ์ธ์คํด์ค ์ ์ฅ์
|
| 103 |
_embedding_model = None
|
| 104 |
+
_image_embedding_model = None
|
| 105 |
|
| 106 |
def get_embedding_model() -> OnnxGemmaWrapper:
|
| 107 |
"""
|
|
|
|
| 114 |
token=hf_token
|
| 115 |
)
|
| 116 |
return _embedding_model
|
| 117 |
+
|
| 118 |
+
def get_image_embedding_model() -> EfficientNetV2Embedding:
|
| 119 |
+
"""
|
| 120 |
+
EfficientNetV2-S ๋ชจ๋ธ์ ์ต์ด 1ํ ๋ก๋ํ์ฌ ์ฑ๊ธํค์ผ๋ก ์ฌ์ฌ์ฉํฉ๋๋ค.
|
| 121 |
+
"""
|
| 122 |
+
global _image_embedding_model
|
| 123 |
+
if _image_embedding_model is None:
|
| 124 |
+
_image_embedding_model = EfficientNetV2Embedding()
|
| 125 |
+
return _image_embedding_model
|
core/models.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import Column, BigInteger, String, Text, DateTime
|
| 2 |
+
from pgvector.sqlalchemy import Vector
|
| 3 |
+
from core.database import Base
|
| 4 |
+
|
| 5 |
+
class NewsEmbedding(Base):
|
| 6 |
+
# ๋ณด์ฌ์ค ์ด๋ฏธ์ง์ ํ
์ด๋ธ ์ด๋ฆ๊ณผ ์คํค๋ง๋ฅผ ๊ทธ๋๋ก ๋ฐ์ํ์ด
|
| 7 |
+
__tablename__ = "t_test_textembedding"
|
| 8 |
+
|
| 9 |
+
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
| 10 |
+
title = Column(String(500), nullable=False)
|
| 11 |
+
# Gemma ์๋ฒ ๋ฉ ๋ชจ๋ธ์ ๊ธฐ๋ณธ ์ถ๋ ฅ ์ฐจ์์ธ 768๋ก ์ค์
|
| 12 |
+
title_embedding = Column(Vector(768))
|
| 13 |
+
content = Column(Text, nullable=False)
|
| 14 |
+
content_embedding = Column(Vector(768))
|
| 15 |
+
created_at = Column(DateTime(timezone=True))
|
requirements.txt
CHANGED
|
@@ -20,4 +20,12 @@ langchain-community
|
|
| 20 |
langchain-huggingface
|
| 21 |
langchain-google-genai
|
| 22 |
|
| 23 |
-
onnxruntime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
langchain-huggingface
|
| 21 |
langchain-google-genai
|
| 22 |
|
| 23 |
+
onnxruntime
|
| 24 |
+
apscheduler
|
| 25 |
+
feedparser
|
| 26 |
+
sqlalchemy
|
| 27 |
+
psycopg2-binary
|
| 28 |
+
pgvector
|
| 29 |
+
pytz
|
| 30 |
+
newspaper3k
|
| 31 |
+
lxml_html_clean
|
router/image_embedding_router.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, HTTPException, UploadFile, File
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from typing import List, Optional, Any
|
| 4 |
+
from io import BytesIO
|
| 5 |
+
from PIL import Image
|
| 6 |
+
|
| 7 |
+
# ์ฝ์ด ๋ชจ๋์์ ๋ชจ๋ธ ๊ฐ์ ธ์ค๊ธฐ (์ฑ๊ธํค ๋ณด์ฅ)
|
| 8 |
+
from core.dependencies import get_image_embedding_model
|
| 9 |
+
|
| 10 |
+
router = APIRouter(tags=["Image Embedding"])
|
| 11 |
+
|
| 12 |
+
# ๋ผ์ฐํฐ ์ง์
์ ์์ ๋ชจ๋ธ์ ํ๋ณด
|
| 13 |
+
image_embedding_model = get_image_embedding_model()
|
| 14 |
+
|
| 15 |
+
class ImageEmbeddingResponse(BaseModel):
|
| 16 |
+
success: bool
|
| 17 |
+
data: Optional[Any] = None
|
| 18 |
+
msg: Optional[str] = None
|
| 19 |
+
|
| 20 |
+
@router.post("/image_to_embedding", response_model=ImageEmbeddingResponse)
|
| 21 |
+
async def image_to_embedding(file: UploadFile = File(...)):
|
| 22 |
+
"""
|
| 23 |
+
์ด๋ฏธ์ง ํ์ผ์ ์
๋ก๋๋ฐ์ EfficientNetV2-S ๋ชจ๋ธ๋ก ์๋ฒ ๋ฉํ ๊ฒฐ๊ณผ๋ฅผ ๋ฐํํฉ๋๋ค.
|
| 24 |
+
"""
|
| 25 |
+
try:
|
| 26 |
+
# ์ด๋ฏธ์ง ํ์ผ ์ฝ๊ธฐ
|
| 27 |
+
contents = await file.read()
|
| 28 |
+
image = Image.open(BytesIO(contents)).convert("RGB")
|
| 29 |
+
|
| 30 |
+
# ์ด๋ฏธ์ง๋ฅผ ์๋ฒ ๋ฉ ๋ณํ. 1280 ์ฐจ์
|
| 31 |
+
emb_vector = image_embedding_model.embed_image(image)
|
| 32 |
+
|
| 33 |
+
return {"success": True, "data": {"embedding": emb_vector}, "msg": ""}
|
| 34 |
+
except Exception as e:
|
| 35 |
+
return {"success": False, "data": None, "msg": str(e)}
|
test_gemini.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import feedparser
|
| 2 |
+
import requests
|
| 3 |
+
from bs4 import BeautifulSoup
|
| 4 |
+
import os
|
| 5 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 6 |
+
from langchain_core.messages import HumanMessage
|
| 7 |
+
|
| 8 |
+
def test():
|
| 9 |
+
rss_url = "https://news.google.com/rss/search?q=%EA%B8%88&hl=ko&gl=KR&ceid=KR:ko"
|
| 10 |
+
feed = feedparser.parse(rss_url)
|
| 11 |
+
if not feed.entries:
|
| 12 |
+
return
|
| 13 |
+
entry = feed.entries[0]
|
| 14 |
+
print("Link:", entry.link)
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
# fetch
|
| 18 |
+
r = requests.get(entry.link, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
|
| 19 |
+
soup = BeautifulSoup(r.text, 'html.parser')
|
| 20 |
+
text = soup.get_text(separator=' ', strip=True)
|
| 21 |
+
print("Text preview:", text[:200])
|
| 22 |
+
|
| 23 |
+
# Test Gemini
|
| 24 |
+
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.1)
|
| 25 |
+
|
| 26 |
+
prompt = f"""
|
| 27 |
+
๋ค์์ ๋ด์ค ๊ธฐ์ฌ ์๋ฌธ์
๋๋ค:
|
| 28 |
+
{text[:3000]}
|
| 29 |
+
|
| 30 |
+
์ด ๋ด์ค๊ฐ ๊ธ(Gold, ๊ท๊ธ์/ํฌ์์์ฐ)๊ณผ ๊ด๋ จ๋ ์ค์ ๋ด์ค์ธ์ง ํ๋ณํ๊ณ ,
|
| 31 |
+
๋ง๋ค๋ฉด ๊ธฐ์ฌ์ ํต์ฌ ๋ด์ฉ์ 1~2์ค๋ก ์์ฝํด์ฃผ์ธ์.
|
| 32 |
+
๋ง์ฝ ๊ธ๊ณผ ์ ํ ๊ด๋ จ์ด ์๋ ๋ด์ค๋ผ๋ฉด (์: ๊ธ์์ผ, ์ก๊ธ, ์์ธ ์๋ ์ผ๋ฐ ๊ธฐ์ฌ ๋ฑ)
|
| 33 |
+
'NOT_GOLD_NEWS' ๋ผ๊ณ ๋ง ์ ํํ ๋ต๋ณํ์ธ์.
|
| 34 |
+
|
| 35 |
+
์์ฝ ๊ฒฐ๊ณผ:
|
| 36 |
+
"""
|
| 37 |
+
response = llm.invoke([HumanMessage(content=prompt)])
|
| 38 |
+
print("\nGemini Response:", response.content)
|
| 39 |
+
except Exception as e:
|
| 40 |
+
print("Error:", e)
|
| 41 |
+
|
| 42 |
+
if __name__ == "__main__":
|
| 43 |
+
test()
|
test_image_embedding.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
def test_image_embedding():
|
| 5 |
+
url = "http://localhost:8000/image_embedding/image_to_embedding"
|
| 6 |
+
image_path = r"C:\Users\itg\.gemini\antigravity\brain\a2d1bd2b-b329-461a-ab89-c0d64934f5fb\test_image_for_embedding_1772686600102.png"
|
| 7 |
+
|
| 8 |
+
if not os.path.exists(image_path):
|
| 9 |
+
print(f"Error: {image_path} not found.")
|
| 10 |
+
return
|
| 11 |
+
|
| 12 |
+
with open(image_path, "rb") as f:
|
| 13 |
+
files = {"file": (image_path, f, "image/png")}
|
| 14 |
+
try:
|
| 15 |
+
response = requests.post(url, files=files)
|
| 16 |
+
if response.status_code == 200:
|
| 17 |
+
result = response.json()
|
| 18 |
+
if result["success"]:
|
| 19 |
+
embedding = result["data"]["embedding"]
|
| 20 |
+
print(f"Successfully retrieved embedding. Dimension: {len(embedding)}")
|
| 21 |
+
# EfficientNetV2-S embedding dimension should be 1280
|
| 22 |
+
if len(embedding) == 1280:
|
| 23 |
+
print("Verification PASSED: Embedding dimension is 1280.")
|
| 24 |
+
else:
|
| 25 |
+
print(f"Verification FAILED: Expected dimension 1280, got {len(embedding)}.")
|
| 26 |
+
else:
|
| 27 |
+
print(f"API Error: {result['msg']}")
|
| 28 |
+
else:
|
| 29 |
+
print(f"HTTP Error: {response.status_code}")
|
| 30 |
+
except Exception as e:
|
| 31 |
+
print(f"Request failed: {e}")
|
| 32 |
+
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
test_image_embedding()
|
test_rss.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import feedparser
|
| 2 |
+
|
| 3 |
+
rss_url = "https://news.google.com/rss/search?q=%EB%A7%88%EC%9D%B4%ED%81%AC%EB%A1%9C%EC%86%8C%ED%94%84%ED%8A%B8&hl=ko&gl=KR&ceid=KR:ko"
|
| 4 |
+
feed = feedparser.parse(rss_url)
|
| 5 |
+
|
| 6 |
+
if feed.entries:
|
| 7 |
+
entry = feed.entries[0]
|
| 8 |
+
print(entry.keys())
|
| 9 |
+
print("Title:", entry.title)
|
| 10 |
+
print("Link:", entry.link)
|
| 11 |
+
print("Description:", entry.description)
|