"""web_crawler 모듈 테스트.""" import pytest from unittest.mock import patch, AsyncMock from app.web_crawler import crawl_blog_content, enrich_top_blogs, _parse_naver_blog_url def test_parse_naver_blog_url_valid(): """blog.naver.com URL에서 blogId와 logNo를 올바르게 파싱.""" result = _parse_naver_blog_url("https://blog.naver.com/testuser/123456") assert result == ("testuser", "123456") def test_parse_returns_none_for_invalid_url(): """잘못된 URL은 None 반환.""" result = _parse_naver_blog_url("https://example.com/post") assert result is None @pytest.mark.asyncio async def test_crawl_returns_empty_on_non_naver_url(): """네이버 블로그가 아닌 URL은 빈 문자열 반환.""" result = await crawl_blog_content("https://example.com/post") assert result == "" @pytest.mark.asyncio async def test_crawl_truncates_to_2000_chars(): """본문이 2000자를 초과하면 잘라낸다.""" long_html = f'

{"가" * 3000}

' with patch("app.web_crawler._fetch_html", new_callable=AsyncMock, return_value=long_html): result = await crawl_blog_content("https://blog.naver.com/testuser/123") assert len(result) <= 2000 @pytest.mark.asyncio async def test_crawl_returns_empty_on_fetch_failure(): """HTTP 요청 실패 시 빈 문자열 반환.""" with patch("app.web_crawler._fetch_html", new_callable=AsyncMock, side_effect=Exception("timeout")): result = await crawl_blog_content("https://blog.naver.com/testuser/123") assert result == "" @pytest.mark.asyncio async def test_enrich_top_blogs_adds_content_field(): """enrich_top_blogs가 각 블로그에 content 필드를 추가.""" blogs = [ {"title": "테스트", "link": "https://blog.naver.com/user1/111", "bloggername": "유저1", "description": "설명"}, {"title": "테스트2", "link": "https://blog.naver.com/user2/222", "bloggername": "유저2", "description": "설명2"}, ] with patch("app.web_crawler.crawl_blog_content", new_callable=AsyncMock, return_value="크롤링된 본문"): result = await enrich_top_blogs(blogs) assert len(result) == 2 assert result[0]["content"] == "크롤링된 본문" assert result[1]["content"] == "크롤링된 본문" @pytest.mark.asyncio async def test_enrich_top_blogs_handles_partial_failure(): """일부 크롤링 실패 시에도 나머지는 정상 처리.""" blogs = [ {"title": "성공", "link": "https://blog.naver.com/user1/111"}, {"title": "실패", "link": "https://blog.naver.com/user2/222"}, ] side_effects = ["성공 본문", Exception("fail")] with patch("app.web_crawler.crawl_blog_content", new_callable=AsyncMock, side_effect=side_effects): result = await enrich_top_blogs(blogs) assert result[0]["content"] == "성공 본문" assert result[1]["content"] == ""