Source code for disclosure_alpha.edgar.client
from __future__ import annotations
import json
import os
import threading
import time
import urllib.error
import urllib.request
from functools import lru_cache
from typing import Any
from disclosure_alpha.edgar.types import SecFetchError
SEC_BASE = "https://www.sec.gov"
DATA_BASE = "https://data.sec.gov"
_MIN_INTERVAL = 0.11 # ponytail: ~9 req/s global lock; fine for single-user self-host
_last_request_at = 0.0
_throttle_lock = threading.Lock()
def _user_agent() -> str:
ua = os.environ.get("SEC_USER_AGENT", "").strip()
if not ua:
raise SecFetchError(
"SEC_USER_AGENT env var required (e.g. 'YourName your@email.com')"
)
return ua
def _throttle() -> None:
global _last_request_at
with _throttle_lock:
elapsed = time.monotonic() - _last_request_at
if elapsed < _MIN_INTERVAL:
time.sleep(_MIN_INTERVAL - elapsed)
_last_request_at = time.monotonic()
[docs]
def fetch_json(url: str) -> Any:
_throttle()
req = urllib.request.Request(url, headers={"User-Agent": _user_agent(), "Accept": "application/json"})
try:
with urllib.request.urlopen(req, timeout=60) as resp:
return json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as exc:
raise SecFetchError(f"SEC HTTP {exc.code} for {url}") from exc
except urllib.error.URLError as exc:
raise SecFetchError(f"SEC fetch failed for {url}: {exc.reason}") from exc
[docs]
def fetch_text(url: str) -> str:
_throttle()
req = urllib.request.Request(url, headers={"User-Agent": _user_agent(), "Accept": "text/html,*/*"})
try:
with urllib.request.urlopen(req, timeout=120) as resp:
return resp.read().decode("utf-8", errors="replace")
except urllib.error.HTTPError as exc:
raise SecFetchError(f"SEC HTTP {exc.code} for {url}") from exc
except urllib.error.URLError as exc:
raise SecFetchError(f"SEC fetch failed for {url}: {exc.reason}") from exc
[docs]
@lru_cache(maxsize=1)
def fetch_company_tickers() -> dict[str, tuple[str, str]]:
"""Return ticker -> (cik_padded, company_name)."""
data = fetch_json(f"{SEC_BASE}/files/company_tickers.json")
out: dict[str, tuple[str, str]] = {}
for entry in data.values():
ticker = str(entry["ticker"]).upper()
cik = str(entry["cik_str"]).zfill(10)
out[ticker] = (cik, str(entry.get("title", "")))
return out
[docs]
def fetch_submissions(cik: str) -> dict[str, Any]:
cik_padded = cik.zfill(10)
return fetch_json(f"{DATA_BASE}/submissions/CIK{cik_padded}.json")
[docs]
def filing_document_url(cik: str, accession_number: str, primary_document: str) -> str:
cik_stripped = cik.lstrip("0") or "0"
acc_nodash = accession_number.replace("-", "")
return f"{SEC_BASE}/Archives/edgar/data/{cik_stripped}/{acc_nodash}/{primary_document}"