diff --git a/insta-lab/app/news_collector.py b/insta-lab/app/news_collector.py
new file mode 100644
index 0000000..94acda7
--- /dev/null
+++ b/insta-lab/app/news_collector.py
@@ -0,0 +1,82 @@
+"""NAVER 뉴스 검색 API 연동 — 카테고리별 시드 키워드로 일일 수집."""
+
+import html
+import logging
+import re
+from typing import Any, Dict, List, Optional
+
+import requests
+
+from .config import NAVER_CLIENT_ID, NAVER_CLIENT_SECRET, NEWS_PER_CATEGORY
+from . import db
+
+logger = logging.getLogger(__name__)
+
+NEWS_URL = "https://openapi.naver.com/v1/search/news.json"
+_HEADERS = {
+ "X-Naver-Client-Id": NAVER_CLIENT_ID,
+ "X-Naver-Client-Secret": NAVER_CLIENT_SECRET,
+}
+_TAG_RE = re.compile(r"<[^>]+>")
+
+
+def _clean(text: str) -> str:
+ if not text:
+ return ""
+ no_tag = _TAG_RE.sub("", text)
+ return html.unescape(no_tag).strip()
+
+
+def search_news(keyword: str, display: int = 30, sort: str = "date") -> List[Dict[str, Any]]:
+ """NAVER news.json 단일 호출.
+
+ Returns: list of {title, link, summary, pub_date}
+ """
+ resp = requests.get(
+ NEWS_URL,
+ headers=_HEADERS,
+ params={"query": keyword, "display": display, "sort": sort},
+ timeout=10,
+ )
+ resp.raise_for_status()
+ data = resp.json()
+ return [
+ {
+ "title": _clean(item.get("title", "")),
+ "link": item.get("link") or item.get("originallink", ""),
+ "summary": _clean(item.get("description", "")),
+ "pub_date": item.get("pubDate", ""),
+ }
+ for item in data.get("items", [])
+ ]
+
+
+def collect_for_category(category: str,
+ seed_keywords: List[str],
+ per_keyword: Optional[int] = None) -> int:
+ """카테고리에 대해 시드 키워드 각각으로 검색 후 DB에 삽입.
+ UNIQUE(link)가 중복 삽입을 막음. 시도된 기사 수(중복 포함) 반환.
+ """
+ per_kw = per_keyword if per_keyword is not None else max(1, NEWS_PER_CATEGORY // max(1, len(seed_keywords)))
+ seen_links = set()
+ attempted = 0
+ for kw in seed_keywords:
+ try:
+ items = search_news(kw, display=per_kw)
+ except Exception as e:
+ logger.warning("search_news failed kw=%s err=%s", kw, e)
+ continue
+ for item in items:
+ link = item["link"]
+ if not link or link in seen_links:
+ continue
+ seen_links.add(link)
+ db.add_news_article({
+ "category": category,
+ "title": item["title"],
+ "link": link,
+ "summary": item["summary"],
+ "pub_date": item["pub_date"],
+ })
+ attempted += 1
+ return attempted
diff --git a/insta-lab/tests/test_news_collector.py b/insta-lab/tests/test_news_collector.py
new file mode 100644
index 0000000..a582bac
--- /dev/null
+++ b/insta-lab/tests/test_news_collector.py
@@ -0,0 +1,89 @@
+from unittest.mock import patch, MagicMock
+import os
+import tempfile
+
+import pytest
+
+from app import db as db_module
+from app import news_collector
+
+
+@pytest.fixture
+def tmp_db(monkeypatch):
+ fd, path = tempfile.mkstemp(suffix=".db")
+ os.close(fd)
+ monkeypatch.setattr(db_module, "DB_PATH", path)
+ db_module.init_db()
+ yield path
+ # Close all SQLite WAL files before removal (needed on Windows)
+ import gc
+ gc.collect()
+ for ext in ("", "-wal", "-shm"):
+ try:
+ os.remove(path + ext)
+ except FileNotFoundError:
+ pass
+
+
+SAMPLE_RESPONSE = {
+ "items": [
+ {
+ "title": "금리 인상 단행",
+ "originallink": "https://news.example.com/1",
+ "link": "https://n.news.naver.com/article/1",
+ "description": "한국은행이 기준금리를 25bp 올렸다.",
+ "pubDate": "Fri, 15 May 2026 08:00:00 +0900",
+ },
+ {
+ "title": "환율 급등",
+ "originallink": "https://news.example.com/2",
+ "link": "https://n.news.naver.com/article/2",
+ "description": "원달러 환율이 1400원을 돌파했다.",
+ "pubDate": "Fri, 15 May 2026 09:00:00 +0900",
+ },
+ ],
+}
+
+
+def test_strip_html_and_decode_entities():
+ out = news_collector._clean(' "테스트" & 아이템 ')
+ assert out == '"테스트" & 아이템'
+
+
+def test_search_news_parses_items(tmp_db):
+ fake_resp = MagicMock()
+ fake_resp.json.return_value = SAMPLE_RESPONSE
+ fake_resp.raise_for_status.return_value = None
+ with patch.object(news_collector.requests, "get", return_value=fake_resp):
+ items = news_collector.search_news("금리", display=10)
+ assert len(items) == 2
+ assert items[0]["title"] == "금리 인상 단행"
+ assert items[0]["summary"].startswith("한국은행")
+
+
+def test_collect_for_category_inserts(tmp_db):
+ fake_resp = MagicMock()
+ fake_resp.json.return_value = SAMPLE_RESPONSE
+ fake_resp.raise_for_status.return_value = None
+ with patch.object(news_collector.requests, "get", return_value=fake_resp):
+ news_collector.collect_for_category("economy", seed_keywords=["금리"], per_keyword=10)
+ rows = db_module.list_news_articles(category="economy", days=7)
+ assert {r["link"] for r in rows} == {
+ "https://n.news.naver.com/article/1",
+ "https://n.news.naver.com/article/2",
+ }
+
+
+def test_collect_dedupes_existing(tmp_db):
+ db_module.add_news_article({
+ "category": "economy", "title": "기존",
+ "link": "https://n.news.naver.com/article/1", "summary": ""
+ })
+ fake_resp = MagicMock()
+ fake_resp.json.return_value = SAMPLE_RESPONSE
+ fake_resp.raise_for_status.return_value = None
+ with patch.object(news_collector.requests, "get", return_value=fake_resp):
+ news_collector.collect_for_category("economy", seed_keywords=["금리"])
+ rows = db_module.list_news_articles(category="economy", days=7)
+ # 1 pre-existing + 1 newly added (the other link); UNIQUE link blocks duplicate insert
+ assert len(rows) == 2