diff --git a/CHECK_POINT.md b/CHECK_POINT.md index 57738cb..c649c12 100644 --- a/CHECK_POINT.md +++ b/CHECK_POINT.md @@ -113,23 +113,26 @@ scheduler.add_job(_run_simulation_job, "cron", hour="0,4,8,12,16,20", minute=30) ## ๐ŸŸก ์ค‘๊ธฐ (1~2์ฃผ) -### 6. Chromium Browser Pool ์žฌ์„ค๊ณ„ (insta-lab) +### 6. Chromium Browser Pool ์žฌ์„ค๊ณ„ (insta-lab) โœ… 2026-05-18 - ๋งค๋ฒˆ launch X โ†’ 1๊ฐœ ์ธ์Šคํ„ด์Šค ์žฌ์‚ฌ์šฉ - ์นด๋“œ 10์žฅ ๋ Œ๋” ์‹œ๊ฐ„ 30% ๋‹จ์ถ• ๊ธฐ๋Œ€ -- [ ] `insta-lab/app/browser_pool.py` ์‹ ๊ทœ ๋ชจ๋“ˆ -- [ ] card_renderer์—์„œ ํ’€ ์‚ฌ์šฉ +- [x] `card_renderer.py` ๋‚ด๋ถ€์— ๋ชจ๋“ˆ ๋ ˆ๋ฒจ `_PLAYWRIGHT`/`_BROWSER` + `init_browser`/`shutdown_browser` ํ•จ์ˆ˜ (๋ณ„๋„ ๋ชจ๋“ˆ ๋ถ„๋ฆฌ ์•ˆ ํ•จ, ๊ฐ™์€ ํŒŒ์ผ์— ์ธ์ ‘ ๋ฐฐ์น˜) +- [x] `_render_slate_locked` ๋ณธ์ฒด์—์„œ `_get_browser()` ์žฌ์‚ฌ์šฉ (crashed ์‹œ lazy ์žฌ์ดˆ๊ธฐํ™”) +- [x] `main.py` startup hook์—์„œ `init_browser()`, shutdown hook์—์„œ `shutdown_browser()` -### 7. stock ๋‰ด์Šค ์Šคํฌ๋žฉ ๋น„๋™๊ธฐํ™” -- **ํŒŒ์ผ**: `stock/app/main.py:104-149` -- `run_scraping_job()` ๋™๊ธฐ โ†’ BackgroundTask ๋˜๋Š” asyncio -- 08:00 APScheduler ๋ธ”๋กœํ‚น ํ•ด์†Œ -- [ ] async/await ๋ณ€ํ™˜ +### 7. stock ๋‰ด์Šค ์Šคํฌ๋žฉ ๋น„๋™๊ธฐํ™” โ€” โš ๏ธ ๋ณด๋ฅ˜ 2026-05-18 +- **์žฌ์ง„๋‹จ**: stock์€ `BackgroundScheduler` ์‚ฌ์šฉ ์ค‘ โ†’ main loop ๋ธ”๋กœํ‚น ์—†์Œ (์ด๋ฏธ ๋ณ„๋„ thread) +- `fetch_market_news`์˜ 4๊ฐœ ๋™๊ธฐ `requests.get`์€ network I/O wait๋ผ CPU ๊ฑฐ์˜ ์‚ฌ์šฉ ์•ˆ ํ•จ +- `to_thread`๋กœ wrapํ•ด๋„ BackgroundScheduler ํ™˜๊ฒฝ์—์„œ ์‚ฌ์‹ค์ƒ ์˜๋ฏธ ์—†์Œ +- ์ง„์งœ ํšจ๊ณผ๋ฅผ ๋ณด๋ ค๋ฉด AsyncIOScheduler ์ „ํ™˜ + scraper.py 4๊ฐœ fetch๋ฅผ `aiohttp` ๋ณ‘๋ ฌ๋กœ โ€” **ํฐ ๋ฆฌํŒฉํ† ๋ง vs ํšจ๊ณผ ๋ถˆ๋ช…ํ™•** +- [ ] ๋ฐ•์žฌ์˜ค ํŒ๋‹จ: ํฐ ๋ฆฌํŒฉํ† ๋ง ์ง„ํ–‰ ์—ฌ๋ถ€ -### 8. realestate ์ˆ˜์ง‘ ๋ณ‘๋ ฌํ™” -- **ํŒŒ์ผ**: `realestate-lab/app/main.py:28-37` -- `collect_all()` + `delete_old()` ๋ณ‘๋ ฌ โ†’ `asyncio.gather` +### 8. realestate ์ˆ˜์ง‘ ๋ณ‘๋ ฌํ™” โœ… 2026-05-18 +- **ํŒŒ์ผ**: `realestate-lab/app/main.py:scheduled_collect` +- `collect_all()` + `delete_old_completed_announcements()` ๋ณ‘๋ ฌ +- BackgroundScheduler ํ™˜๊ฒฝ์ด๋ผ `asyncio.gather` ๋Œ€์‹  `ThreadPoolExecutor(max_workers=2)` ์‚ฌ์šฉ (ํšจ๊ณผ ๋™์ผ) - ๋งค์นญ์€ ์ˆœ์ฐจ ์œ ์ง€ (DB ์ผ๊ด€์„ฑ) -- [ ] async ํ•จ์ˆ˜ ๋ณ€ํ™˜ + gather ์ ์šฉ +- [x] ThreadPoolExecutor ์ ์šฉ ### 9. lotto Monte Carlo ์‹œ๋ฎฌ๋ ˆ์ด์…˜ ๋นˆ๋„ ๊ฒ€ํ†  - ํ˜„์žฌ 6ํšŒ/์ผ (00ยท04ยท08ยท12ยท16ยท20) @@ -173,6 +176,7 @@ services: - 2026-05-17: insta-lab minimal theme + design_importer ์ถ”๊ฐ€ - 2026-05-17: blog-lab ํŠธ๋ž™ ์™„์ „ ํ๊ธฐ (docker-compose์— ์—†์Œ, ์œ„ํ‚ค ์ •์ • ์™„๋ฃŒ) - 2026-05-18: ๐Ÿ”ด ์ฆ‰์‹œ 5๊ฑด ์ผ๊ด„ ์ ์šฉ โ€” 09:00 cron ์Šคํƒœ๊ฑฐ๋ง(insta/lotto/youtube/realestate), lotto Monte Carlo 08:30, insta-lab Semaphore(1), healthcheck 60s, uvicorn --workers 1 ๋ช…์‹œ (์‚ฌ์šฉ์ž push + NAS deployer ์žฌ๊ธฐ๋™ ๋Œ€๊ธฐ) +- 2026-05-18: ๐ŸŸก ์ค‘๊ธฐ 2๊ฑด ์ ์šฉ โ€” #6 insta-lab Chromium Browser Pool (lifecycle hook), #8 realestate ThreadPoolExecutor ๋ณ‘๋ ฌ (collect/delete). #7 stock async๋Š” BackgroundScheduler ์‚ฌ์šฉ ์ค‘์ด๋ผ ์žฌ์ง„๋‹จ ํ›„ ๋ณด๋ฅ˜ (ํšจ๊ณผ ๋ฏธ๋ฏธ). #9 Monte Carlo ๋นˆ๋„๋Š” ๋ฐ•์žฌ์˜ค ๊ฒฐ์ • ๋Œ€๊ธฐ. --- diff --git a/insta-lab/app/card_renderer.py b/insta-lab/app/card_renderer.py index 1547d45..52010ff 100644 --- a/insta-lab/app/card_renderer.py +++ b/insta-lab/app/card_renderer.py @@ -30,6 +30,47 @@ def _render_semaphore() -> asyncio.Semaphore: return _RENDER_SEMAPHORE +# Chromium ๋ธŒ๋ผ์šฐ์ € ํ’€ โ€” ๋งค ์Šฌ๋ ˆ์ดํŠธ๋งˆ๋‹ค launch ํ•˜์ง€ ์•Š๊ณ  1๊ฐœ๋ฅผ ์‚ด๋ ค๋‘ . +# (CHECK_POINT ์ค‘๊ธฐ-6) ์นด๋“œ 10์žฅ ๋ Œ๋” ์‹œ๊ฐ„ ~30% ๋‹จ์ถ• ๊ธฐ๋Œ€. +_PLAYWRIGHT = None +_BROWSER = None + + +async def init_browser() -> None: + """์•ฑ startup hook์—์„œ 1ํšŒ ํ˜ธ์ถœ. ์ด๋ฏธ ์‚ด์•„์žˆ์œผ๋ฉด no-op.""" + global _PLAYWRIGHT, _BROWSER + if _BROWSER is not None and _BROWSER.is_connected(): + return + _PLAYWRIGHT = await async_playwright().start() + _BROWSER = await _PLAYWRIGHT.chromium.launch() + logger.info("Chromium browser pool ์ดˆ๊ธฐํ™” ์™„๋ฃŒ") + + +async def shutdown_browser() -> None: + """์•ฑ shutdown hook์—์„œ 1ํšŒ ํ˜ธ์ถœ.""" + global _PLAYWRIGHT, _BROWSER + if _BROWSER is not None: + try: + await _BROWSER.close() + except Exception: + logger.exception("browser close ์ค‘ ์˜ˆ์™ธ (๋ฌด์‹œ)") + _BROWSER = None + if _PLAYWRIGHT is not None: + try: + await _PLAYWRIGHT.stop() + except Exception: + logger.exception("playwright stop ์ค‘ ์˜ˆ์™ธ (๋ฌด์‹œ)") + _PLAYWRIGHT = None + + +async def _get_browser(): + """ํ˜„์žฌ ๋ธŒ๋ผ์šฐ์ € ํ•ธ๋“ค ๋ฐ˜ํ™˜. crashed/None์ด๋ฉด ์žฌ์ดˆ๊ธฐํ™” ํ›„ ๋ฐ˜ํ™˜.""" + global _BROWSER + if _BROWSER is None or not _BROWSER.is_connected(): + await init_browser() + return _BROWSER + + def _resolve_template_dir() -> str: """Prefer config CARD_TEMPLATE_DIR if it exists; else fall back to in-repo templates/.""" if os.path.isdir(CARD_TEMPLATE_DIR): @@ -97,29 +138,28 @@ async def _render_slate_locked(slate_id: int, template: str) -> List[str]: out_dir = _slate_dir(slate_id) paths: List[str] = [] - async with async_playwright() as p: - browser = await p.chromium.launch() - try: - ctx = await browser.new_context(viewport={"width": 1080, "height": 1350}) - page = await ctx.new_page() - for spec in pages: - html_str = tmpl.render(**spec) - with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False, encoding="utf-8") as f: - f.write(html_str) - html_path = f.name + browser = await _get_browser() + ctx = await browser.new_context(viewport={"width": 1080, "height": 1350}) + try: + page = await ctx.new_page() + for spec in pages: + html_str = tmpl.render(**spec) + with tempfile.NamedTemporaryFile("w", suffix=".html", delete=False, encoding="utf-8") as f: + f.write(html_str) + html_path = f.name + try: + await page.goto(f"file://{html_path}", wait_until="networkidle") + out_path = os.path.join(out_dir, f"{spec['page_no']:02d}.png") + await page.screenshot(path=out_path, full_page=False, omit_background=False) + with open(out_path, "rb") as fp: + file_hash = hashlib.md5(fp.read()).hexdigest() + db.add_card_asset(slate_id, spec["page_no"], out_path, file_hash) + paths.append(out_path) + finally: try: - await page.goto(f"file://{html_path}", wait_until="networkidle") - out_path = os.path.join(out_dir, f"{spec['page_no']:02d}.png") - await page.screenshot(path=out_path, full_page=False, omit_background=False) - with open(out_path, "rb") as fp: - file_hash = hashlib.md5(fp.read()).hexdigest() - db.add_card_asset(slate_id, spec["page_no"], out_path, file_hash) - paths.append(out_path) - finally: - try: - os.unlink(html_path) - except OSError: - pass - finally: - await browser.close() + os.unlink(html_path) + except OSError: + pass + finally: + await ctx.close() return paths diff --git a/insta-lab/app/main.py b/insta-lab/app/main.py index 23ca0e3..5d09005 100644 --- a/insta-lab/app/main.py +++ b/insta-lab/app/main.py @@ -31,9 +31,16 @@ app.add_middleware( @app.on_event("startup") -def on_startup(): +async def on_startup(): os.makedirs(INSTA_DATA_PATH, exist_ok=True) db.init_db() + # Chromium browser pool ์ดˆ๊ธฐํ™” (CHECK_POINT ์ค‘๊ธฐ-6) + await card_renderer.init_browser() + + +@app.on_event("shutdown") +async def on_shutdown(): + await card_renderer.shutdown_browser() @app.get("/health") diff --git a/realestate-lab/app/main.py b/realestate-lab/app/main.py index 2a24790..50a7dac 100644 --- a/realestate-lab/app/main.py +++ b/realestate-lab/app/main.py @@ -1,6 +1,7 @@ import os import logging import threading +from concurrent.futures import ThreadPoolExecutor from contextlib import asynccontextmanager from fastapi import BackgroundTasks, FastAPI, Query, HTTPException from fastapi.middleware.cors import CORSMiddleware @@ -26,10 +27,19 @@ scheduler = BackgroundScheduler(timezone=os.getenv("TZ", "Asia/Seoul")) def scheduled_collect(): - """๋งค์ผ 09:00 โ€” ์ˆ˜์ง‘ + ์ •๋ฆฌ + ๋งค์นญ + ์•Œ๋ฆผ push""" + """๋งค์ผ 09:15 โ€” ์ˆ˜์ง‘ + ์ •๋ฆฌ (๋ณ‘๋ ฌ) โ†’ ๋งค์นญ โ†’ ์•Œ๋ฆผ push. + + collect_all๊ณผ delete_old_completed_announcements๋Š” ์„œ๋กœ ๋‹ค๋ฅธ ๋ฐ์ดํ„ฐ + ์˜์—ญ์„ ๊ฑด๋“œ๋ฆฌ๋ฏ€๋กœ thread ๋‘˜๋กœ ๋ณ‘๋ ฌํ™”. ๋งค์นญ์€ ๋‘ ์ž‘์—… ์™„๋ฃŒ ํ›„ ์ˆœ์ฐจ + ์‹คํ–‰ (DB ์ผ๊ด€์„ฑ). CHECK_POINT ์ค‘๊ธฐ-8 โ€” env์ด BackgroundScheduler+ + ๋™๊ธฐ ํ•จ์ˆ˜ ์กฐํ•ฉ์ด๋ผ asyncio.gather ๋Œ€์‹  ThreadPoolExecutor ์‚ฌ์šฉ. + """ logger.info("์Šค์ผ€์ค„ ์ˆ˜์ง‘ ์‹œ์ž‘") - collect_all() - deleted = delete_old_completed_announcements(grace_days=90) + with ThreadPoolExecutor(max_workers=2) as ex: + collect_future = ex.submit(collect_all) + delete_future = ex.submit(delete_old_completed_announcements, 90) + collect_future.result() + deleted = delete_future.result() if deleted: logger.info("์ •๋ฆฌ: %d๊ฑด ์‚ญ์ œ", deleted) run_matching()