Jump to content
Main menu
Main menu
move to sidebar
hide
Navigation
Main page
Recent changes
Random page
freem
Search
Search
Appearance
Create account
Log in
Personal tools
Create account
Log in
Pages for logged out editors
learn more
Contributions
Talk
Editing
Openai/6936126b-3ed0-8003-aa64-374e59b4ad27
(section)
Add languages
Page
Discussion
English
Read
Edit
Edit source
View history
Tools
Tools
move to sidebar
hide
Actions
Read
Edit
Edit source
View history
General
What links here
Related changes
Special pages
Page information
Appearance
move to sidebar
hide
Warning:
You are not logged in. Your IP address will be publicly visible if you make any edits. If you
log in
or
create an account
, your edits will be attributed to your username, along with other benefits.
Anti-spam check. Do
not
fill this in!
=== Script (copy & paste) === <syntaxhighlight lang="python"># top5_yearly_backtest.py === Precise backtest of: invest Β£20,000 each year on 7-Dec (2020..2024), === === buy equal-weight top-5 US stocks by market cap on that date, === === sell only on the yearly re-check if a holding is no longer top-5, === === reinvest free cash same day equally into current top-5, === === 0.1% transaction cost per buy or sell, dividends reinvested, === === fractional shares allowed, report in GBP (uses historical USD->GBP FX). === === # NOTE: This script uses yfinance to download prices & dividends and Wikipedia === === to obtain an S&P500 (candidate universe) list. Market cap on a date is computed === === as (close price on that date) * (sharesOutstanding). sharesOutstanding can === === change over time; yfinance may not provide historical sharesOutstanding for === === older dates β it will use the latest available sharesOutstanding when historical === === snapshots are unavailable. This is a standard practical approach, but keep the caveat. === === # Requires: pip install yfinance pandas numpy scipy matplotlib openpyxl requests lxml tqdm === import os import math import time from datetime import datetime, timedelta import pandas as pd import numpy as np import yfinance as yf import requests from tqdm import tqdm from scipy.optimize import newton import matplotlib.pyplot as plt plt.rcParams["figure.figsize"] = (10,6) === ----------------------- === === USER SETTINGS / RULES === === ----------------------- === CONTRIBUTION_GBP = 20000.0 CONTRIB_DATES = ["2020-12-07", "2021-12-07", "2022-12-07", "2023-12-07", "2024-12-07"] TRADE_DATES = [pd.to_datetime(d) for d in CONTRIB_DATES] # each year we trade at close of this date TX_COST = 0.001 # 0.1% per trade BASE_CURRENCY = "GBP" USD_GBP_PAIR = "GBPUSD=X" # Yahoo ticker; this returns GBP per USD; USD->GBP = 1 / (GBPUSD) SEARCH_UNIVERSE_SOURCE = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies" # we'll use S&P500 constituents as candidate universe === Date range for historical pricing (extend a little before first contrib) === START_DATE = (TRADE_DATES[0] - pd.Timedelta(days=7)).strftime("%Y-%m-%d") END_DATE = (pd.to_datetime("2025-12-07") + pd.Timedelta(days=1)).strftime("%Y-%m-%d") === Output files === OUT_DIR = "." NAV_CSV = os.path.join(OUT_DIR, "daily_nav_gbp.csv") TRADES_CSV = os.path.join(OUT_DIR, "trades_log.csv") SUMMARY_TXT = os.path.join(OUT_DIR, "summary.txt") CHART_PNG = os.path.join(OUT_DIR, "portfolio_value.png") === ----------------------- === === Helper functions === === ----------------------- === def fetch_sp500_tickers(): """Scrape S&P500 tickers from Wikipedia as candidate universe.""" r = requests.get(SEARCH_UNIVERSE_SOURCE, timeout=30) df = pd.read_html(r.text)[0] tickers = df['Symbol'].tolist() # Yahoo uses BRK-B -> BRK-B or BRK.B; yfinance expects BRK-B as BRK-B or BRK.B? tickers = [t.replace('.', '-') for t in tickers] return tickers def download_price_data(tickers, start, end): """Download daily adjusted close and dividends for tickers via yfinance.""" # Use yfinance multi-ticker download data = {} # to be nice to yfinance servers, do in chunks chunk_size = 40 for i in range(0, len(tickers), chunk_size): chunk = tickers[i:i+chunk_size] yfdata = yf.download(chunk, start=start, end=end, auto_adjust=False, progress=False, threads=True) # yf returns multi-level columns; we will keep 'Adj Close' and 'Close' and 'Dividends' # Also get dividends per ticker via Ticker.actions if needed for t in chunk: try: tkr = yf.Ticker(t) hist = tkr.history(start=start, end=end, actions=True, auto_adjust=False) # hist contains 'Close', 'Dividends', 'Stock Splits', 'Open','High','Low','Volume' data[t] = hist except Exception as e: print("Warning: could not download", t, e) data[t] = pd.DataFrame() time.sleep(0.5) return data def get_shares_outstanding(ticker): """Try to get sharesOutstanding from yfinance info; if missing, return NaN.""" try: info = yf.Ticker(ticker).info so = info.get("sharesOutstanding", np.nan) return so except Exception: return np.nan def get_price_on_date(df_hist, date): """Return the close price for that date; if market closed, use the last prior available close.""" d = pd.to_datetime(date) if df_hist is None or df_hist.empty: return np.nan # hist index is DatetimeIndex # find last available index <= d s = df_hist.loc[:d] if s.empty: return np.nan # pick the last row's Close return float(s['Close'].iloc[-1]) def get_adjclose_on_date(df_hist, date): d = pd.to_datetime(date) if df_hist is None or df_hist.empty: return np.nan s = df_hist.loc[:d] if s.empty: return np.nan # compute adjusted close from 'Close' using splits/dividends not straightforward; # better to use 'Close' and handle dividends via 'Dividends' events (we will reinvest dividends on payment date). return float(s['Close'].iloc[-1]) def download_fx_pair(pair, start, end): # yfinance ticker for GBP per USD is 'GBPUSD=X' (gives GBP per USD). We want USD->GBP rate = 1 / (GBPUSD) df = yf.download(pair, start=start, end=end, progress=False) if df is None or df.empty: raise RuntimeError("Failed to download FX pair {}".format(pair)) df = df['Close'].rename("fx_rate") # GBP per USD # compute USD->GBP = fx_rate (GBP per USD), so multiplying USD amount by fx_rate -> GBP return df def money_weighted_irr(dates, cash_flows): # cash_flows: list of flows at dates (negative = outflow from investor, positive = inflow) # We want IRR for the series; convert to times in years relative to first date # We'll compute X such that NPV = sum(cf_i / (1+X)^(t_i)) = 0 times = np.array([(d - dates[0]).days / 365.25 for d in dates]) def npv(rate): return np.sum(cash_flows / ((1+rate) ** times)) try: irr = newton(lambda r: npv(r), 0.2, maxiter=200) return irr except Exception: # fallback: try bruteforce for guess in np.linspace(-0.9, 5, 300): try: val = npv(guess) if abs(val) < 1e-6: return guess except Exception: continue return np.nan def max_drawdown(series): peak = series.cummax() drawdown = (series - peak) / peak mdd = drawdown.min() return float(mdd) === ----------------------- === === Workflow === === ----------------------- === print("1) Building candidate universe (S&P500)...") tickers = fetch_sp500_tickers() print(f"Found {len(tickers)} tickers. Using them as candidate universe.") print("2) Downloading historical price & dividend data for candidate universe (this can take several minutes)...") hist_data = download_price_data(tickers, START_DATE, END_DATE) print("3) Downloading USD->GBP FX time series...") fx_series = download_fx_pair(USD_GBP_PAIR, START_DATE, END_DATE) # GBP per USD === For each TRADE_DATE, compute market caps and pick top-5 === top5_by_date = {} print("4) Determining top-5 by market cap on each trade date (approx using sharesOutstanding)...") for d in TRADE_DATES: mc_list = [] for t in tqdm(tickers, desc=f"marketcap {d.date()}"): hist = hist_data.get(t) price = get_price_on_date(hist, d) if np.isnan(price): continue so = get_shares_outstanding(t) if np.isnan(so): # skip if no SO continue mc = price * so mc_list.append((t, mc)) mc_df = pd.DataFrame(mc_list, columns=["ticker", "market_cap"]).dropna().sort_values("market_cap", ascending=False) top5 = mc_df.head(5)['ticker'].tolist() top5_by_date[d.strftime("%Y-%m-%d")] = top5 print(f"{d.date()}: top5 = {top5}") === ----------------------- === === Now run simulation: === === state: === === positions: dict ticker -> shares === === cash_gbp: float === === we'll keep accounting in GBP: contributions are GBP, but trades are priced in USD -> convert to GBP using fx on that day. === positions = {} # ticker -> shares cash_gbp = 0.0 nav_daily = [] # list of (date, nav_gbp) trades = [] # records of trades === create unified date index from first price date to END_DATE === all_dates = pd.date_range(start=START_DATE, end=END_DATE, freq='B') # business days === But use actual trading days from FX series (which uses daily) === all_dates = fx_series.index.intersection(pd.DatetimeIndex(all_dates)) all_dates = sorted(all_dates) === helper to compute portfolio NAV in GBP on a date: === def portfolio_nav_gbp(date): total = cash_gbp for t, shares in positions.items(): hist = hist_data.get(t) if hist is None or hist.empty: price = np.nan else: price = get_price_on_date(hist, date) if np.isnan(price): # skip (treat as zero temporarily) continue # FX: USD -> GBP multiply by fx_rate (GBP per USD) fx = fx_series.loc[:date].iloc[-1] total += shares '' price '' fx return float(total) === helper to execute market-close trades (buys and sells) === def execute_trade(date, ticker, shares_delta): """shares_delta positive -> buy; negative -> sell. Price used = Close price on date (USD). Apply TX cost on cash out/in. All values converted to GBP using fx on that date. """ global cash_gbp, positions hist = hist_data.get(ticker) if hist is None or hist.empty: raise RuntimeError(f"No price series for {ticker}") price_usd = get_price_on_date(hist, date) if np.isnan(price_usd): raise RuntimeError(f"No price for {ticker} on {date}") fx = fx_series.loc[:date].iloc[-1] trade_value_usd = shares_delta * price_usd trade_value_gbp = trade_value_usd * fx # transaction cost tc = abs(trade_value_gbp) * TX_COST # update cash and positions # buying shares: cash -= value + tc # selling shares: cash += (-value) - tc because trade_value_usd negative when selling? simpler: if shares_delta > 0: # buy cash_gbp -= trade_value_gbp cash_gbp -= tc positions[ticker] = positions.get(ticker, 0.0) + shares_delta trades.append({ "date": date.strftime("%Y-%m-%d"), "ticker": ticker, "action": "BUY", "shares": shares_delta, "price_usd": price_usd, "fx_gbp_per_usd": fx, "value_gbp": -trade_value_gbp, "tx_cost_gbp": -tc }) else: # sell cash_gbp -= trade_value_gbp # shares_delta negative -> trade_value_gbp negative; subtracting negative => add cash_gbp -= tc positions[ticker] = positions.get(ticker, 0.0) + shares_delta # if shares become ~0 set to 0 if abs(positions[ticker]) < 1e-9: positions[ticker] = 0.0 trades.append({ "date": date.strftime("%Y-%m-%d"), "ticker": ticker, "action": "SELL", "shares": shares_delta, "price_usd": price_usd, "fx_gbp_per_usd": fx, "value_gbp": -trade_value_gbp, "tx_cost_gbp": -tc }) === process timeline === print("5) Running simulation over timeline and executing yearly rebalances on trade dates...") contrib_idx = 0 for current_date in tqdm(all_dates, desc="Simulating"): # if there is a contribution on this date, add GBP to cash then perform reallocation after transaction/sell rules if contrib_idx < len(TRADE_DATES) and pd.to_datetime(current_date).date() == TRADE_DATES[contrib_idx].date(): # add contribution cash_gbp += CONTRIBUTION_GBP # Yearly re-check: determine top-5 at this date key = TRADE_DATES[contrib_idx].strftime("%Y-%m-%d") current_top5 = top5_by_date.get(key, []) # SELL any holdings that are not in current_top5 for t in list(positions.keys()): if positions.get(t,0) > 0 and t not in current_top5: # sell all shares shares_to_sell = -positions[t] execute_trade(current_date, t, shares_to_sell) # After sells, invest free cash equally into current top-5. # But if we already hold positions in current top5, we do not sell them; we just invest free cash equally across current top5. # Compute equal GBP allocation per ticker if len(current_top5) == 0: pass else: alloc_each = cash_gbp / len(current_top5) # For each ticker, compute how many shares to buy to reach +alloc_each (we buy additional shares) for t in current_top5: # price USD and fx hist = hist_data.get(t) if hist is None or hist.empty: continue price_usd = get_price_on_date(hist, current_date) if np.isnan(price_usd): continue fx = fx_series.loc[:current_date].iloc[-1] price_gbp = price_usd * fx # desired purchase GBP = alloc_each if price_gbp <= 0: continue shares = alloc_each / price_gbp if shares <= 1e-12: continue execute_trade(current_date, t, shares) contrib_idx += 1 # For each date we should also apply dividend events (reinvest) # We'll scan each ticker's historical 'Dividends' on that date and reinvest dividends for t in list(positions.keys()): if positions.get(t,0) <= 0: continue hist = hist_data.get(t) if hist is None or hist.empty: continue # check if a dividend occurred on this date if 'Dividends' in hist.columns: try: div_amount = float(hist.loc[current_date:current_date]['Dividends'].sum()) except Exception: div_amount = 0.0 if div_amount > 0: # dividend per share in USD, total USD = div_amount * shares shares_held = positions.get(t,0) total_div_usd = div_amount * shares_held fx = fx_series.loc[:current_date].iloc[-1] total_div_gbp = total_div_usd * fx # Reinvest into same ticker at that date's Close price price_usd = get_price_on_date(hist, current_date) if not np.isnan(price_usd) and price_usd>0: shares_to_buy = total_div_usd / price_usd # Apply no tx_cost for dividend reinvest? User said dividends reinvested; typically no trading cost, but we will '''reinvest without tx cost''' (common assumption). If you want tx cost apply it here. positions[t] = positions.get(t,0) + shares_to_buy trades.append({ "date": current_date.strftime("%Y-%m-%d"), "ticker": t, "action": "DIV_REINVEST", "shares": shares_to_buy, "price_usd": price_usd, "fx_gbp_per_usd": fx, "value_gbp": -total_div_gbp, "tx_cost_gbp": 0.0 }) else: # if price missing, add cash instead to cash_gbp cash_gbp += total_div_gbp # record NAV nav = portfolio_nav_gbp(current_date) nav_daily.append({"date": current_date.strftime("%Y-%m-%d"), "nav_gbp": nav, "cash_gbp": cash_gbp}) === end loop === === Save outputs === nav_df = pd.DataFrame(nav_daily).set_index(pd.to_datetime(nav_daily and [r['date'] for r in nav_daily])) nav_df.to_csv(NAV_CSV) trades_df = pd.DataFrame(trades) trades_df.to_csv(TRADES_CSV, index=False) === compute summary stats === final_nav = nav_df['nav_gbp'].iloc[-1] total_invested = CONTRIBUTION_GBP * len(CONTRIB_DATES) === IRR (money-weighted) === flow_dates = [pd.to_datetime(d) for d in CONTRIB_DATES] + [nav_df.index[-1]] flows = [-CONTRIBUTION_GBP]*len(CONTRIB_DATES) + [final_nav] irr = money_weighted_irr(flow_dates + [nav_df.index[-1]], np.array(flows + [0.0])) # quick attempt; fallback below === Better: compute X solving sum(cf_i/(1+r)^(t_i))=0 where cf_i for contributions negative and final positive. === === We'll build flows with dates: contributions negative at each contrib date; final positive at final date === dates_for_irr = [pd.to_datetime(d) for d in CONTRIB_DATES] + [nav_df.index[-1]] cfs_for_irr = np.array([-CONTRIBUTION_GBP]*len(CONTRIB_DATES) + [final_nav]) try: irr_val = money_weighted_irr(dates_for_irr, cfs_for_irr) except Exception: irr_val = np.nan === annualised volatility: use daily returns of NAV === nav_series = nav_df['nav_gbp'] daily_ret = nav_series.pct_change().dropna() ann_vol = daily_ret.std() * np.sqrt(252) === CAGR from first contribution date to final === t_years = (nav_df.index[-1] - pd.to_datetime(CONTRIB_DATES[0])).days / 365.25 cagr = (final_nav / (CONTRIBUTION_GBP)) ** (1 / t_years) - 1 # note: this is not accurate since contributions are staggered; IRR is better mdd = max_drawdown(nav_series) with open(SUMMARY_TXT, "w") as f: f.write(f"Final NAV (GBP): {final_nav:,.2f}\n") f.write(f"Total invested (GBP): {total_invested:,.2f}\n") f.write(f"Approx money-weighted IRR: {irr_val:.4%}\n") f.write(f"Approx CAGR (naive): {cagr:.4%}\n") f.write(f"Annualised vol (daily returns): {ann_vol:.4%}\n") f.write(f"Max drawdown: {mdd:.4%}\n") f.write(f"Trades saved to {TRADES_CSV}\n") f.write(f"Daily NAV saved to {NAV_CSV}\n") print("Summary written to", SUMMARY_TXT) === Chart === plt.plot(nav_df.index, nav_df['nav_gbp']) plt.title("Portfolio NAV (GBP)") plt.xlabel("Date") plt.ylabel("NAV (GBP)") plt.grid(True) plt.savefig(CHART_PNG, bbox_inches='tight') print("Chart saved to", CHART_PNG) print("Done. Files generated:", NAV_CSV, TRADES_CSV, SUMMARY_TXT, CHART_PNG) </syntaxhighlight>
Summary:
Please note that all contributions to freem are considered to be released under the Creative Commons Attribution-ShareAlike 4.0 (see
Freem:Copyrights
for details). If you do not want your writing to be edited mercilessly and redistributed at will, then do not submit it here.
You are also promising us that you wrote this yourself, or copied it from a public domain or similar free resource.
Do not submit copyrighted work without permission!
Cancel
Editing help
(opens in new window)