feat(ai_news): pipeline uses articles_source (replaces Naver scraper)

2026-05-14 02:09:41 +09:00
parent ec0ccf649e
commit bbe5221e57
2 changed files with 97 additions and 71 deletions
--- a/stock-lab/tests/test_ai_news_pipeline.py
+++ b/stock-lab/tests/test_ai_news_pipeline.py
@@ -26,12 +26,15 @@ def conn():

@pytest.mark.asyncio
 async def test_refresh_daily_happy_path(conn):
-    """3종목 mini integration — 각 종목별로 scraper/analyzer mock."""
+    """3종목 mini integration — articles_source mock + analyzer mock."""
    asof = dt.date(2026, 5, 13)
-    fake_news = [{"title": "헤드라인"}]

-    async def fake_fetch(client, ticker, n):
-        return fake_news
+    fake_articles_by_ticker = {
+        "005930": [{"title": "삼성 뉴스", "summary": "", "press": "", "pub_date": ""}],
+        "000660": [{"title": "SK 뉴스", "summary": "", "press": "", "pub_date": ""}],
+        "373220": [{"title": "LG 뉴스", "summary": "", "press": "", "pub_date": ""}],
+    }
+    fake_stats = {"total_articles": 3, "matched_pairs": 3, "hit_tickers": 3}

    scores_by_ticker = {
        "005930": 7.5, "000660": 4.0, "373220": -6.0,
@@ -43,43 +46,40 @@ async def test_refresh_daily_happy_path(conn):
            "tokens_input": 100, "tokens_output": 20, "model": model,
        }

-    with patch.object(pipeline, "_scraper") as ms, \
+    with patch.object(pipeline, "articles_source") as mas, \
         patch.object(pipeline, "_analyzer") as ma, \
-         patch.object(pipeline, "_make_llm") as ml, \
-         patch.object(pipeline, "_make_http") as mh:
-        ms.fetch_news = fake_fetch
+         patch.object(pipeline, "_make_llm") as ml:
+        mas.gather_articles_for_tickers = MagicMock(
+            return_value=(fake_articles_by_ticker, fake_stats)
+        )
        ma.score_sentiment = fake_score
        ml.return_value.__aenter__.return_value = AsyncMock()
        ml.return_value.__aexit__.return_value = None
-        mh.return_value.__aenter__.return_value = AsyncMock()
-        mh.return_value.__aexit__.return_value = None
-        result = await pipeline.refresh_daily(conn, asof, concurrency=3, rate_limit_sec=0)
+        result = await pipeline.refresh_daily(conn, asof, concurrency=3)

    assert result["asof"] == "2026-05-13"
    assert result["updated"] == 3
    assert result["failures"] == []
-    assert len(result["top_pos"]) == 3
-    assert result["top_pos"][0]["ticker"] == "005930"  # 가장 큰 점수
-    assert result["top_neg"][0]["ticker"] == "373220"  # 가장 작은 점수
-    assert result["tokens_input"] == 300
-    assert result["tokens_output"] == 60
+    assert result["top_pos"][0]["ticker"] == "005930"
+    assert result["top_neg"][0]["ticker"] == "373220"
+    assert result["mapping"] == fake_stats

-    # DB upsert 확인
-    rows = conn.execute("SELECT ticker, score_raw FROM news_sentiment WHERE date=?",
-                        ("2026-05-13",)).fetchall()
+    rows = conn.execute("SELECT ticker, score_raw, source FROM news_sentiment "
+                        "WHERE date=?", ("2026-05-13",)).fetchall()
    assert len(rows) == 3
-    by_ticker = {r["ticker"]: r["score_raw"] for r in rows}
-    assert by_ticker["005930"] == 7.5
-    assert by_ticker["373220"] == -6.0
+    assert all(r["source"] == "articles" for r in rows)


@pytest.mark.asyncio
 async def test_refresh_daily_failures_isolated(conn):
-    """한 종목이 예외 던져도 나머지 종목은 정상 처리."""
    asof = dt.date(2026, 5, 13)

-    async def fake_fetch(client, ticker, n):
-        return [{"title": "h"}]
+    fake_articles_by_ticker = {
+        "005930": [{"title": "h", "summary": "", "press": "", "pub_date": ""}],
+        "000660": [{"title": "h", "summary": "", "press": "", "pub_date": ""}],
+        "373220": [{"title": "h", "summary": "", "press": "", "pub_date": ""}],
+    }
+    fake_stats = {"total_articles": 3, "matched_pairs": 3, "hit_tickers": 3}

    async def fake_score(llm, ticker, news, *, name=None, model="m"):
        if ticker == "000660":
@@ -89,22 +89,57 @@ async def test_refresh_daily_failures_isolated(conn):
            "tokens_input": 100, "tokens_output": 20, "model": model,
        }

-    with patch.object(pipeline, "_scraper") as ms, \
+    with patch.object(pipeline, "articles_source") as mas, \
         patch.object(pipeline, "_analyzer") as ma, \
-         patch.object(pipeline, "_make_llm") as ml, \
-         patch.object(pipeline, "_make_http") as mh:
-        ms.fetch_news = fake_fetch
+         patch.object(pipeline, "_make_llm") as ml:
+        mas.gather_articles_for_tickers = MagicMock(
+            return_value=(fake_articles_by_ticker, fake_stats)
+        )
        ma.score_sentiment = fake_score
        ml.return_value.__aenter__.return_value = AsyncMock()
        ml.return_value.__aexit__.return_value = None
-        mh.return_value.__aenter__.return_value = AsyncMock()
-        mh.return_value.__aexit__.return_value = None
-        result = await pipeline.refresh_daily(conn, asof, concurrency=3, rate_limit_sec=0)
+        result = await pipeline.refresh_daily(conn, asof, concurrency=3)

    assert result["updated"] == 2
    assert len(result["failures"]) == 1


+@pytest.mark.asyncio
+async def test_refresh_daily_no_match_ticker_skipped(conn):
+    """매핑 0인 ticker 는 LLM 호출 skip + news_sentiment 행 미생성."""
+    asof = dt.date(2026, 5, 13)
+
+    fake_articles_by_ticker = {
+        "005930": [{"title": "삼성", "summary": "", "press": "", "pub_date": ""}],
+        "000660": [],  # 매핑 없음
+        "373220": [],  # 매핑 없음
+    }
+    fake_stats = {"total_articles": 1, "matched_pairs": 1, "hit_tickers": 1}
+
+    async def fake_score(llm, ticker, news, *, name=None, model="m"):
+        return {
+            "ticker": ticker, "score_raw": 5.0, "reason": "r",
+            "news_count": 1, "tokens_input": 100, "tokens_output": 20,
+            "model": model,
+        }
+
+    with patch.object(pipeline, "articles_source") as mas, \
+         patch.object(pipeline, "_analyzer") as ma, \
+         patch.object(pipeline, "_make_llm") as ml:
+        mas.gather_articles_for_tickers = MagicMock(
+            return_value=(fake_articles_by_ticker, fake_stats)
+        )
+        ma.score_sentiment = fake_score
+        ml.return_value.__aenter__.return_value = AsyncMock()
+        ml.return_value.__aexit__.return_value = None
+        result = await pipeline.refresh_daily(conn, asof, concurrency=3)
+
+    assert result["updated"] == 1
+    rows = conn.execute("SELECT ticker FROM news_sentiment "
+                        "WHERE date=?", ("2026-05-13",)).fetchall()
+    assert {r["ticker"] for r in rows} == {"005930"}
+
+
 def test_top_market_cap_tickers(conn):
    out = pipeline._top_market_cap_tickers(conn, n=2)
    assert out == ["005930", "000660"]