feat(realestate-collector): 30-day window + district extraction + completed skip

- Add _extract_district() helper with DISTRICT_PATTERN regex (서울 only)
- collect_all() now passes RCRIT_PBLANC_DE_FROM param (30-day window) to all detail endpoints
- collect_all() skips announcements where compute_status() returns '완료'
- collect_all() stamps district on each parsed announcement before upsert
- upsert_announcement(): add district to INSERT/VALUES/ON CONFLICT UPDATE; data.setdefault('district', None)
- ANNOUNCEMENT_COLUMNS: add 'district' (closes deferred gap from Task 2 review)
- 9 new tests in realestate-lab/tests/test_collector.py (6 unit + 3 integration)
- Full suite: 22 passed

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-28 08:28:10 +09:00
parent 496e3a6a73
commit 9dd517e82a
3 changed files with 178 additions and 9 deletions

View File

@@ -194,6 +194,7 @@ def _ann_row_to_dict(r) -> Dict[str, Any]:
def upsert_announcement(data: Dict[str, Any]) -> tuple:
"""공고 upsert — house_manage_no + pblanc_no 기준. Returns (dict, is_new: bool)."""
data.setdefault("district", None) # 수동 등록 등에서 누락 시 안전 처리
status = compute_status(
data.get("receipt_start", ""),
data.get("receipt_end", ""),
@@ -208,7 +209,7 @@ def upsert_announcement(data: Dict[str, Any]) -> tuple:
conn.execute("""
INSERT INTO announcements (
house_manage_no, pblanc_no, house_nm, house_secd, house_dtl_secd,
rent_secd, region_code, region_name, address, total_units,
rent_secd, region_code, region_name, district, address, total_units,
rcrit_date, receipt_start, receipt_end, spsply_start, spsply_end,
gnrl_rank1_start, gnrl_rank1_end, winner_date, contract_start,
contract_end, homepage_url, pblanc_url, constructor, developer,
@@ -216,7 +217,7 @@ def upsert_announcement(data: Dict[str, Any]) -> tuple:
status, source
) VALUES (
:house_manage_no, :pblanc_no, :house_nm, :house_secd, :house_dtl_secd,
:rent_secd, :region_code, :region_name, :address, :total_units,
:rent_secd, :region_code, :region_name, :district, :address, :total_units,
:rcrit_date, :receipt_start, :receipt_end, :spsply_start, :spsply_end,
:gnrl_rank1_start, :gnrl_rank1_end, :winner_date, :contract_start,
:contract_end, :homepage_url, :pblanc_url, :constructor, :developer,
@@ -230,6 +231,7 @@ def upsert_announcement(data: Dict[str, Any]) -> tuple:
rent_secd=excluded.rent_secd,
region_code=excluded.region_code,
region_name=excluded.region_name,
district=excluded.district,
address=excluded.address,
total_units=excluded.total_units,
rcrit_date=excluded.rcrit_date,
@@ -368,7 +370,7 @@ def create_announcement(data: Dict[str, Any]) -> Dict[str, Any]:
ANNOUNCEMENT_COLUMNS = {
"house_nm", "house_secd", "house_dtl_secd", "rent_secd",
"region_code", "region_name", "address", "total_units",
"region_code", "region_name", "district", "address", "total_units",
"rcrit_date", "receipt_start", "receipt_end", "spsply_start", "spsply_end",
"gnrl_rank1_start", "gnrl_rank1_end", "winner_date",
"contract_start", "contract_end", "homepage_url", "pblanc_url",