Files
web-page-backend/stock/app/screener/snapshot.py
gahusb ace0339d33 refactor: rename stock-lab → stock (graduation)
- git mv stock-lab/ → stock/
- docker-compose.yml: 서비스 키 + container_name + build.context +
  frontend.depends_on + agent-office STOCK_LAB_URL → STOCK_URL
- agent-office/app: config.py, service_proxy.py, agents/stock.py, tests/
  STOCK_LAB_URL → STOCK_URL
- nginx/default.conf: proxy_pass http://stock-labhttp://stock (3 lines)
- CLAUDE.md / README.md / STATUS.md / scripts/ 문구 갱신
- stock/ 내부 자기 참조 갱신

lab 네이밍 정책 (feedback_lab_naming.md) graduation.
API URL / Python import / DB 파일명 변경 없음.
2026-05-15 01:45:44 +09:00

251 lines
8.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""KRX daily snapshot loader (FDR + naver finance scraping)."""
from __future__ import annotations
import datetime as dt
import logging
import re
import sqlite3
import time
from dataclasses import dataclass
import FinanceDataReader as fdr
import httpx
import pandas as pd
from bs4 import BeautifulSoup
log = logging.getLogger(__name__)
NAVER_FRGN_URL = "https://finance.naver.com/item/frgn.naver"
NAVER_HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "https://finance.naver.com/",
}
DEFAULT_FLOW_TOP_N = 100
DEFAULT_RATE_LIMIT_SEC = 0.2
# 시총 상위 100종목 × 0.2초 = ~20초 — agent-office httpx timeout(180s) 안에 여유롭게 완료
# 외국인 매수 시그널은 대형주에서 의미가 크므로 상위 100종목으로 충분.
# 더 많은 종목이 필요하면 별도 cron으로 분리 권장.
@dataclass
class RefreshSummary:
asof: dt.date
master_count: int
prices_count: int
flow_count: int
failures: list[str]
def asdict(self) -> dict:
return {
"asof": self.asof.isoformat(),
"master_count": self.master_count,
"prices_count": self.prices_count,
"flow_count": self.flow_count,
"failures": self.failures,
}
def _iso(d: dt.date) -> str:
return d.isoformat()
def _is_preferred(name: str) -> int:
"""우선주 휴리스틱: 종목명이 ''로 끝나거나 '우[A-Z]?'/'\\d?' 패턴."""
n = name or ""
return 1 if re.search(r"우[A-Z]?$|우\d?$", n) else 0
def _is_spac(name: str) -> int:
return 1 if "스팩" in (name or "") else 0
def fetch_master_listing() -> pd.DataFrame:
"""fdr.StockListing('KRX'). Wrapped for stub-ability in tests."""
return fdr.StockListing("KRX")
def fetch_ohlcv_for_ticker(ticker: str, start: str, end: str) -> pd.DataFrame:
"""fdr.DataReader for backfill."""
return fdr.DataReader(ticker, start, end)
def fetch_flow_naver(ticker: str, *, client) -> dict | None:
"""Scrape naver frgn page; return latest-day flow dict, or None."""
r = client.get(NAVER_FRGN_URL, params={"code": ticker, "page": 1})
if r.status_code != 200:
return None
soup = BeautifulSoup(r.text, "lxml")
for row in soup.select("table.type2 tr"):
cells = [c.get_text(strip=True).replace(",", "") for c in row.select("td")]
if not cells or not cells[0]:
continue
if not re.match(r"\d{4}\.\d{2}\.\d{2}", cells[0]):
continue
try:
inst = int(cells[5]) if cells[5] not in ("", "-") else 0
foreign = int(cells[6]) if cells[6] not in ("", "-") else 0
return {
"date": cells[0].replace(".", "-"),
"foreign_net": foreign,
"institution_net": inst,
}
except (IndexError, ValueError):
return None
return None
def _master_and_prices_rows(asof: dt.date,
df: pd.DataFrame) -> tuple[list[tuple], list[tuple]]:
iso = _iso(asof)
now_iso = dt.datetime.utcnow().isoformat()
master_rows: list[tuple] = []
price_rows: list[tuple] = []
for _, row in df.iterrows():
ticker = str(row.get("Code") or "").strip()
name = str(row.get("Name") or "").strip()
if not ticker or not name:
continue
market_raw = str(row.get("Market") or "").upper()
market = "KOSDAQ" if "KOSDAQ" in market_raw else "KOSPI"
try:
market_cap = int(row["Marcap"]) if pd.notna(row.get("Marcap")) else None
except (TypeError, ValueError):
market_cap = None
master_rows.append((
ticker, name, market, market_cap,
0, _is_preferred(name), _is_spac(name),
None, now_iso,
))
try:
o = int(row["Open"]) if pd.notna(row.get("Open")) else None
h = int(row["High"]) if pd.notna(row.get("High")) else None
l = int(row["Low"]) if pd.notna(row.get("Low")) else None
c = int(row["Close"]) if pd.notna(row.get("Close")) else None
v = int(row["Volume"]) if pd.notna(row.get("Volume")) else None
amt = row.get("Amount")
a = int(amt) if pd.notna(amt) else None
if c is not None and v is not None:
price_rows.append((ticker, iso, o, h, l, c, v, a))
except (TypeError, KeyError):
pass
return master_rows, price_rows
def _gather_flow_naver(asof: dt.date, tickers: list[str],
*, rate_limit_sec: float = DEFAULT_RATE_LIMIT_SEC) -> list[tuple]:
iso = _iso(asof)
rows: list[tuple] = []
if not tickers:
return rows
with httpx.Client(timeout=10, headers=NAVER_HEADERS) as client:
for t in tickers:
try:
data = fetch_flow_naver(t, client=client)
if data and data["date"] == iso:
rows.append((t, iso, data["foreign_net"], data["institution_net"]))
except Exception as e:
log.warning("flow scrape failed for %s: %s", t, e)
if rate_limit_sec > 0:
time.sleep(rate_limit_sec)
return rows
def refresh_daily(conn: sqlite3.Connection, asof: dt.date,
flow_top_n: int = DEFAULT_FLOW_TOP_N,
rate_limit_sec: float = DEFAULT_RATE_LIMIT_SEC) -> dict:
"""Pull master + prices (FDR) + flow (naver scraping for top N by market cap)."""
df = fetch_master_listing()
master_rows, price_rows = _master_and_prices_rows(asof, df)
conn.executemany("""
INSERT INTO krx_master (
ticker, name, market, market_cap,
is_managed, is_preferred, is_spac,
listed_date, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(ticker) DO UPDATE SET
name=excluded.name, market=excluded.market,
market_cap=excluded.market_cap,
is_managed=excluded.is_managed,
is_preferred=excluded.is_preferred,
is_spac=excluded.is_spac,
updated_at=excluded.updated_at
""", master_rows)
conn.executemany("""
INSERT OR REPLACE INTO krx_daily_prices
(ticker, date, open, high, low, close, volume, value)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", price_rows)
# 외국인/기관: 시총 상위 N종목만 (rate limit 보호)
if flow_top_n > 0:
top = sorted(master_rows, key=lambda r: r[3] or 0, reverse=True)[:flow_top_n]
flow_tickers = [r[0] for r in top]
else:
flow_tickers = []
flow_rows = _gather_flow_naver(asof, flow_tickers, rate_limit_sec=rate_limit_sec)
conn.executemany("""
INSERT OR REPLACE INTO krx_flow
(ticker, date, foreign_net, institution_net)
VALUES (?, ?, ?, ?)
""", flow_rows)
conn.commit()
return RefreshSummary(
asof=asof, master_count=len(master_rows),
prices_count=len(price_rows), flow_count=len(flow_rows),
failures=[],
).asdict()
def backfill(conn: sqlite3.Connection, start: dt.date, end: dt.date) -> list[dict]:
"""5년치 일봉 백필 — 종목별 fdr.DataReader 호출. Master는 end 기준 (FDR은 historical master 미지원)."""
df = fetch_master_listing()
master_rows, _ = _master_and_prices_rows(end, df)
conn.executemany("""
INSERT INTO krx_master (
ticker, name, market, market_cap,
is_managed, is_preferred, is_spac,
listed_date, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(ticker) DO UPDATE SET name=excluded.name
""", master_rows)
iso_start = start.isoformat()
iso_end = end.isoformat()
results = []
for r in master_rows:
t = r[0]
try:
ddf = fetch_ohlcv_for_ticker(t, iso_start, iso_end)
if ddf is None or ddf.empty:
continue
ddf = ddf.reset_index()
ddf["Date"] = pd.to_datetime(ddf["Date"]).dt.strftime("%Y-%m-%d")
rows = []
for _, rr in ddf.iterrows():
if pd.isna(rr["Close"]) or pd.isna(rr["Volume"]):
continue
rows.append((
t, rr["Date"],
int(rr["Open"]) if pd.notna(rr["Open"]) else None,
int(rr["High"]) if pd.notna(rr["High"]) else None,
int(rr["Low"]) if pd.notna(rr["Low"]) else None,
int(rr["Close"]),
int(rr["Volume"]),
int(rr["Close"] * rr["Volume"]),
))
conn.executemany("""
INSERT OR REPLACE INTO krx_daily_prices
(ticker, date, open, high, low, close, volume, value)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", rows)
results.append({"ticker": t, "count": len(rows)})
except Exception as e:
log.error("backfill failed for %s: %s", t, e)
results.append({"ticker": t, "error": str(e)})
conn.commit()
return results