"""web_crawler 모듈 테스트."""
import pytest
from unittest.mock import patch, AsyncMock
from app.web_crawler import crawl_blog_content, enrich_top_blogs, _parse_naver_blog_url
def test_parse_naver_blog_url_valid():
"""blog.naver.com URL에서 blogId와 logNo를 올바르게 파싱."""
result = _parse_naver_blog_url("https://blog.naver.com/testuser/123456")
assert result == ("testuser", "123456")
def test_parse_returns_none_for_invalid_url():
"""잘못된 URL은 None 반환."""
result = _parse_naver_blog_url("https://example.com/post")
assert result is None
@pytest.mark.asyncio
async def test_crawl_returns_empty_on_non_naver_url():
"""네이버 블로그가 아닌 URL은 빈 문자열 반환."""
result = await crawl_blog_content("https://example.com/post")
assert result == ""
@pytest.mark.asyncio
async def test_crawl_truncates_to_2000_chars():
"""본문이 2000자를 초과하면 잘라낸다."""
long_html = f'
'
with patch("app.web_crawler._fetch_html", new_callable=AsyncMock, return_value=long_html):
result = await crawl_blog_content("https://blog.naver.com/testuser/123")
assert len(result) <= 2000
@pytest.mark.asyncio
async def test_crawl_returns_empty_on_fetch_failure():
"""HTTP 요청 실패 시 빈 문자열 반환."""
with patch("app.web_crawler._fetch_html", new_callable=AsyncMock, side_effect=Exception("timeout")):
result = await crawl_blog_content("https://blog.naver.com/testuser/123")
assert result == ""
@pytest.mark.asyncio
async def test_enrich_top_blogs_adds_content_field():
"""enrich_top_blogs가 각 블로그에 content 필드를 추가."""
blogs = [
{"title": "테스트", "link": "https://blog.naver.com/user1/111", "bloggername": "유저1", "description": "설명"},
{"title": "테스트2", "link": "https://blog.naver.com/user2/222", "bloggername": "유저2", "description": "설명2"},
]
with patch("app.web_crawler.crawl_blog_content", new_callable=AsyncMock, return_value="크롤링된 본문"):
result = await enrich_top_blogs(blogs)
assert len(result) == 2
assert result[0]["content"] == "크롤링된 본문"
assert result[1]["content"] == "크롤링된 본문"
@pytest.mark.asyncio
async def test_enrich_top_blogs_handles_partial_failure():
"""일부 크롤링 실패 시에도 나머지는 정상 처리."""
blogs = [
{"title": "성공", "link": "https://blog.naver.com/user1/111"},
{"title": "실패", "link": "https://blog.naver.com/user2/222"},
]
side_effects = ["성공 본문", Exception("fail")]
with patch("app.web_crawler.crawl_blog_content", new_callable=AsyncMock, side_effect=side_effects):
result = await enrich_top_blogs(blogs)
assert result[0]["content"] == "성공 본문"
assert result[1]["content"] == ""