bifitnex-trading/backtest/data_loader.py
kroutony 32aa6e40cd Add non-price context filters to backtest (BTC trend, buy pressure, funding)
V3 backtest: add context parameter to signal_generator with three new filters:
- BTC trend filter: skip altcoin BUYs when BTC 1h EMA9<EMA21 + ADX>20
- Buy pressure (OHLCV proxy): penalize BUY score when close near low, boost SELL
- Funding sentiment (BTC perp basis): penalize BUY on overleveraged longs, boost SELL

Results: return -19.07% → -13.48%, max DD -27.19% → -18.25%, BUYs 385 → 189.
Added --no-context CLI flag for A/B comparison.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 16:54:22 +00:00

144 lines
5.3 KiB
Python

"""Fetch and cache historical candle data from Bitfinex public API."""
import logging
import os
import time
import pandas as pd
import requests
logger = logging.getLogger(__name__)
CACHE_DIR = os.path.join(os.path.dirname(__file__), "..", "cache", "backtest")
BFX_BASE_URL = "https://api-pub.bitfinex.com"
MAX_CANDLES_PER_REQUEST = 10000
def _fetch_candles_page(symbol: str, timeframe: str, start_ms: int, end_ms: int, limit: int = MAX_CANDLES_PER_REQUEST) -> list:
url = f"{BFX_BASE_URL}/v2/candles/trade:{timeframe}:{symbol}/hist"
params = {"start": start_ms, "end": end_ms, "limit": limit, "sort": 1}
for attempt in range(5):
resp = requests.get(url, params=params, timeout=30)
if resp.status_code == 429:
wait = 2 ** attempt + 1
logger.warning("Rate limited, waiting %ds...", wait)
time.sleep(wait)
continue
resp.raise_for_status()
return resp.json()
resp.raise_for_status()
return []
def fetch_historical_candles(symbol: str, timeframe: str, start_ms: int, end_ms: int) -> pd.DataFrame:
"""Fetch candles with pagination. Returns full DataFrame sorted ascending."""
all_candles = []
current_start = start_ms
while current_start < end_ms:
raw = _fetch_candles_page(symbol, timeframe, current_start, end_ms)
if not raw:
break
all_candles.extend(raw)
last_ts = raw[-1][0]
if last_ts <= current_start:
break
current_start = last_ts + 1
time.sleep(1.5)
if not all_candles:
return pd.DataFrame()
df = pd.DataFrame(all_candles, columns=["timestamp", "open", "close", "high", "low", "volume"])
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")
df = df.drop_duplicates(subset=["timestamp"]).sort_values("timestamp").reset_index(drop=True)
return df
def _cache_path(symbol: str, timeframe: str) -> str:
os.makedirs(CACHE_DIR, exist_ok=True)
safe_sym = symbol.replace(":", "_")
return os.path.join(CACHE_DIR, f"{safe_sym}_{timeframe}.csv")
def load_or_fetch_perp(start_date: str, end_date: str) -> pd.DataFrame:
"""Load BTC perpetual (tBTCF0:USTF0) 1h candles for basis/funding calculation.
Returns DataFrame with columns: timestamp, open, close, high, low, volume.
Returns empty DataFrame on failure (graceful degradation).
"""
symbol = "tBTCF0:USTF0"
start_ms = int(pd.Timestamp(start_date).timestamp() * 1000)
end_ms = int(pd.Timestamp(end_date).timestamp() * 1000)
cache = _cache_path(symbol, "1h")
range_hours = (end_ms - start_ms) / 3_600_000
min_expected = int(range_hours * 0.5)
if os.path.exists(cache):
df = pd.read_csv(cache, parse_dates=["timestamp"])
if len(df) >= min_expected:
logger.info("Loaded perp %s from cache (%d candles)", symbol, len(df))
return df
logger.info("Perp cache too small (%d < %d), re-fetching", len(df), min_expected)
os.remove(cache)
try:
logger.info("Fetching %s 1h from Bitfinex...", symbol)
df = fetch_historical_candles(symbol, "1h", start_ms, end_ms)
if not df.empty:
df.to_csv(cache, index=False)
logger.info("Fetched %s 1h: %d candles", symbol, len(df))
return df
except Exception as e:
logger.warning("Failed to fetch perp data %s: %s (continuing without)", symbol, e)
return pd.DataFrame()
def load_or_fetch(symbols: list[str], start_date: str, end_date: str) -> dict[str, dict]:
"""Load from cache if available, otherwise fetch and cache.
Returns {symbol: {"candles_5m": DataFrame, "candles_1h": DataFrame}}.
"""
start_ms = int(pd.Timestamp(start_date).timestamp() * 1000)
end_ms = int(pd.Timestamp(end_date).timestamp() * 1000)
# Calculate expected minimum candle count for the date range
range_hours = (end_ms - start_ms) / 3_600_000
min_5m = int(range_hours * 12 * 0.5) # at least 50% of expected
min_1h = int(range_hours * 0.5)
data = {}
for sym in symbols:
result = {}
for tf in ("5m", "1h"):
cache = _cache_path(sym, tf)
min_expected = min_5m if tf == "5m" else min_1h
df = pd.DataFrame()
if os.path.exists(cache):
df = pd.read_csv(cache, parse_dates=["timestamp"])
if len(df) < min_expected:
logger.info("Cache %s %s too small (%d < %d), re-fetching", sym, tf, len(df), min_expected)
os.remove(cache)
df = pd.DataFrame()
else:
logger.info("Loaded %s %s from cache (%d candles)", sym, tf, len(df))
if df.empty:
try:
logger.info("Fetching %s %s from Bitfinex...", sym, tf)
df = fetch_historical_candles(sym, tf, start_ms, end_ms)
if not df.empty:
df.to_csv(cache, index=False)
logger.info("Fetched %s %s: %d candles", sym, tf, len(df))
time.sleep(1.5)
except Exception as e:
logger.warning("Failed to fetch %s %s: %s (skipping)", sym, tf, e)
df = pd.DataFrame()
result[f"candles_{tf}"] = df
data[sym] = result
return data