- hourly_trend_report.py: standalone cron script (XX:00:30) sends 1h bullish/bearish status - slack_notifier.py: add send_market_trend_report() — simple bullish/bearish only, no entry signals - main.py: log all 15 HOLD reasons (not just first 3) for debugging all-HOLD cycles - backtest/whale_correlation.py: blockchain.com on-chain correlation analysis (result: no signal) - memory/: update project memory with architecture split, cron layout, feedback Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
238 lines
8.7 KiB
Python
238 lines
8.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Fetch blockchain.com on-chain metrics and analyze correlation with BTC price changes.
|
|
|
|
Whale proxy metrics (all free, daily granularity):
|
|
- estimated-transaction-volume (BTC): total estimated tx volume
|
|
- n-transactions: daily confirmed transaction count
|
|
- Derived: avg_tx_size = volume / n_transactions (whale activity proxy)
|
|
- output-volume (BTC): total output value
|
|
|
|
Correlation targets:
|
|
- BTC next-day return
|
|
- BTC next-3-day return
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
import requests
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
|
|
CACHE_DIR = os.path.join(os.path.dirname(__file__), "..", "cache", "backtest")
|
|
BTC_1H_CACHE = os.path.join(CACHE_DIR, "tBTCUST_1h.csv")
|
|
|
|
BLOCKCHAIN_API = "https://api.blockchain.info/charts"
|
|
METRICS = [
|
|
"estimated-transaction-volume", # BTC total est. tx volume
|
|
"estimated-transaction-volume-usd", # USD total est. tx volume
|
|
"n-transactions", # daily confirmed tx count
|
|
"output-volume", # total output value (BTC)
|
|
"n-unique-addresses", # unique addresses per day
|
|
]
|
|
|
|
|
|
def fetch_blockchain_metric(name: str, start: str, end: str) -> pd.DataFrame:
|
|
"""Fetch a single blockchain.com chart metric."""
|
|
start_ts = int(pd.Timestamp(start).timestamp())
|
|
end_ts = int(pd.Timestamp(end).timestamp())
|
|
# timespan is calculated from end; we use start param to set beginning
|
|
url = f"{BLOCKCHAIN_API}/{name}"
|
|
params = {
|
|
"format": "json",
|
|
"start": start_ts,
|
|
"timespan": "1year", # large enough window
|
|
}
|
|
resp = requests.get(url, params=params, timeout=30)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
|
|
values = data.get("values", [])
|
|
if not values:
|
|
return pd.DataFrame()
|
|
|
|
df = pd.DataFrame(values)
|
|
df.columns = ["timestamp", name]
|
|
df["date"] = pd.to_datetime(df["timestamp"], unit="s").dt.date
|
|
df = df[["date", name]]
|
|
|
|
# Filter to requested range
|
|
start_date = pd.Timestamp(start).date()
|
|
end_date = pd.Timestamp(end).date()
|
|
df = df[(df["date"] >= start_date) & (df["date"] <= end_date)]
|
|
|
|
return df
|
|
|
|
|
|
def load_btc_daily_prices() -> pd.DataFrame:
|
|
"""Load BTC 1h cache and resample to daily OHLC."""
|
|
if not os.path.exists(BTC_1H_CACHE):
|
|
print(f"ERROR: BTC 1h cache not found at {BTC_1H_CACHE}")
|
|
print("Run backtest first to populate the cache.")
|
|
sys.exit(1)
|
|
|
|
df = pd.read_csv(BTC_1H_CACHE, parse_dates=["timestamp"])
|
|
df["date"] = df["timestamp"].dt.date
|
|
daily = df.groupby("date").agg(
|
|
open=("open", "first"),
|
|
high=("high", "max"),
|
|
low=("low", "min"),
|
|
close=("close", "last"),
|
|
volume=("volume", "sum"),
|
|
).reset_index()
|
|
return daily
|
|
|
|
|
|
def main():
|
|
start = "2025-07-01"
|
|
end = "2026-03-17"
|
|
|
|
print("=== Whale Activity ↔ BTC Price Correlation Analysis ===\n")
|
|
|
|
# Step 1: Fetch on-chain metrics
|
|
print("Fetching blockchain.com metrics...")
|
|
metrics_dfs = []
|
|
for metric in METRICS:
|
|
print(f" {metric}...", end=" ", flush=True)
|
|
try:
|
|
df = fetch_blockchain_metric(metric, start, end)
|
|
print(f"{len(df)} days")
|
|
metrics_dfs.append(df)
|
|
except Exception as e:
|
|
print(f"FAILED: {e}")
|
|
time.sleep(2) # rate limit: 1 req / 10 sec (be conservative)
|
|
|
|
if not metrics_dfs:
|
|
print("ERROR: No metrics fetched")
|
|
return
|
|
|
|
# Merge all metrics on date
|
|
onchain = metrics_dfs[0]
|
|
for df in metrics_dfs[1:]:
|
|
onchain = onchain.merge(df, on="date", how="outer")
|
|
onchain = onchain.sort_values("date").reset_index(drop=True)
|
|
|
|
# Derived metrics
|
|
if "estimated-transaction-volume" in onchain.columns and "n-transactions" in onchain.columns:
|
|
onchain["avg_tx_size_btc"] = onchain["estimated-transaction-volume"] / onchain["n-transactions"]
|
|
if "estimated-transaction-volume-usd" in onchain.columns and "n-transactions" in onchain.columns:
|
|
onchain["avg_tx_size_usd"] = onchain["estimated-transaction-volume-usd"] / onchain["n-transactions"]
|
|
|
|
print(f"\nOn-chain data: {len(onchain)} days")
|
|
|
|
# Step 2: Load BTC prices
|
|
print("Loading BTC daily prices from cache...")
|
|
btc = load_btc_daily_prices()
|
|
print(f"BTC daily data: {len(btc)} days")
|
|
|
|
# Step 3: Merge and compute returns
|
|
merged = onchain.merge(btc[["date", "close", "volume"]], on="date", how="inner")
|
|
merged = merged.rename(columns={"close": "btc_close", "volume": "btc_volume"})
|
|
merged = merged.sort_values("date").reset_index(drop=True)
|
|
|
|
# Price returns (forward-looking)
|
|
merged["ret_1d"] = merged["btc_close"].pct_change().shift(-1) # next-day return
|
|
merged["ret_3d"] = merged["btc_close"].pct_change(3).shift(-3) # next-3-day return
|
|
merged["ret_5d"] = merged["btc_close"].pct_change(5).shift(-5) # next-5-day return
|
|
|
|
# Z-score normalization for on-chain metrics (rolling 30-day)
|
|
onchain_cols = [c for c in merged.columns if c not in
|
|
["date", "btc_close", "btc_volume", "ret_1d", "ret_3d", "ret_5d"]]
|
|
|
|
for col in onchain_cols:
|
|
roll_mean = merged[col].rolling(30, min_periods=10).mean()
|
|
roll_std = merged[col].rolling(30, min_periods=10).std()
|
|
merged[f"{col}_zscore"] = (merged[col] - roll_mean) / roll_std.replace(0, np.nan)
|
|
|
|
# Step 4: Correlation analysis
|
|
print(f"\nMerged dataset: {len(merged)} days")
|
|
print(f"Date range: {merged['date'].iloc[0]} to {merged['date'].iloc[-1]}")
|
|
|
|
# Raw correlations
|
|
zscore_cols = [c for c in merged.columns if c.endswith("_zscore")]
|
|
target_cols = ["ret_1d", "ret_3d", "ret_5d"]
|
|
|
|
print("\n" + "=" * 70)
|
|
print(" PEARSON CORRELATION: On-Chain Metrics ↔ BTC Forward Returns")
|
|
print("=" * 70)
|
|
|
|
valid = merged.dropna(subset=target_cols + zscore_cols)
|
|
print(f" (Using {len(valid)} complete observations)\n")
|
|
|
|
results = []
|
|
for oc_col in zscore_cols:
|
|
for target in target_cols:
|
|
corr = valid[oc_col].corr(valid[target])
|
|
results.append({"metric": oc_col, "target": target, "corr": corr})
|
|
|
|
results_df = pd.DataFrame(results)
|
|
|
|
# Print as pivot table
|
|
pivot = results_df.pivot(index="metric", columns="target", values="corr")
|
|
pivot = pivot[target_cols] # order columns
|
|
|
|
# Sort by absolute correlation with ret_1d
|
|
pivot["abs_ret_1d"] = pivot["ret_1d"].abs()
|
|
pivot = pivot.sort_values("abs_ret_1d", ascending=False)
|
|
pivot = pivot.drop(columns="abs_ret_1d")
|
|
|
|
for metric in pivot.index:
|
|
name = metric.replace("_zscore", "")
|
|
vals = " ".join(f"{pivot.loc[metric, t]:+.4f}" for t in target_cols)
|
|
print(f" {name:<35s} {vals}")
|
|
|
|
print(f"\n {'':35s} {'ret_1d':>8s} {'ret_3d':>8s} {'ret_5d':>8s}")
|
|
|
|
# Step 5: Highlight significant correlations
|
|
print("\n" + "=" * 70)
|
|
print(" NOTABLE CORRELATIONS (|r| > 0.10)")
|
|
print("=" * 70)
|
|
|
|
notable = results_df[results_df["corr"].abs() > 0.10].sort_values("corr", key=abs, ascending=False)
|
|
if notable.empty:
|
|
print(" None found — on-chain metrics show weak correlation with BTC returns.")
|
|
else:
|
|
for _, row in notable.iterrows():
|
|
direction = "↑↑" if row["corr"] > 0 else "↓↑" if row["corr"] < 0 else " "
|
|
name = row["metric"].replace("_zscore", "")
|
|
print(f" {direction} {name:<35s} → {row['target']}: r={row['corr']:+.4f}")
|
|
|
|
# Step 6: Extreme value analysis (whale spikes)
|
|
print("\n" + "=" * 70)
|
|
print(" EXTREME VALUE ANALYSIS (Top/Bottom 10% Days)")
|
|
print("=" * 70)
|
|
|
|
for col_name in ["avg_tx_size_btc", "estimated-transaction-volume", "avg_tx_size_usd"]:
|
|
zscore_col = f"{col_name}_zscore"
|
|
if zscore_col not in merged.columns:
|
|
continue
|
|
|
|
valid_ext = merged.dropna(subset=[zscore_col, "ret_1d"])
|
|
if len(valid_ext) < 20:
|
|
continue
|
|
|
|
q10 = valid_ext[zscore_col].quantile(0.10)
|
|
q90 = valid_ext[zscore_col].quantile(0.90)
|
|
|
|
low_days = valid_ext[valid_ext[zscore_col] <= q10]
|
|
high_days = valid_ext[valid_ext[zscore_col] >= q90]
|
|
all_avg = valid_ext["ret_1d"].mean()
|
|
|
|
print(f"\n {col_name}:")
|
|
print(f" Low activity days (bottom 10%): avg next-day ret = {low_days['ret_1d'].mean():+.4f} (n={len(low_days)})")
|
|
print(f" High activity days (top 10%): avg next-day ret = {high_days['ret_1d'].mean():+.4f} (n={len(high_days)})")
|
|
print(f" All days average: avg next-day ret = {all_avg:+.4f} (n={len(valid_ext)})")
|
|
|
|
# Save merged data for further analysis
|
|
out_path = os.path.join(CACHE_DIR, "whale_correlation_data.csv")
|
|
merged.to_csv(out_path, index=False)
|
|
print(f"\nSaved merged dataset to {out_path}")
|
|
print("Done.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|