diff options
Diffstat (limited to 'notebooks/2024-11-29-election-petition.py')
-rw-r--r-- | notebooks/2024-11-29-election-petition.py | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/notebooks/2024-11-29-election-petition.py b/notebooks/2024-11-29-election-petition.py new file mode 100644 index 0000000..0666c7f --- /dev/null +++ b/notebooks/2024-11-29-election-petition.py @@ -0,0 +1,286 @@ +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "marimo", +# "matplotlib==3.10.5", +# "pandas==2.3.1", +# "python-dotenv==1.1.1", +# "seaborn==0.13.2", +# ] +# /// + +import marimo + +__generated_with = "0.14.17" +app = marimo.App() + + +@app.cell(hide_code=True) +def _(mo): + mo.md( + r""" + # Analysing the Petition for a General Election + + 2024 general election results where downloaded from [here](https://commonslibrary.parliament.uk/research-briefings/cbp-10009/) on 29 November 2024. Petition results were downloaded using the following command on 29 November 2024: + + ```shell + curl https://petition.parliament.uk/petitions/700143.json | jq > data/election-petition-results.json + ``` + """ + ) + return + + +@app.cell +def _(): + import json + from pathlib import Path + + import marimo as mo + import matplotlib.pyplot as plt + import pandas as pd + import seaborn as sns + from dotenv import find_dotenv + + ROOT_DIR = Path(find_dotenv("pyproject.toml")).parent + DATA_DIR = ROOT_DIR / "data" + ELECTION_RESULTS_PATH = DATA_DIR / "election-petition" / "HoC-GE2024-results-by-candidate.csv" + PETITION_RESULTS_PATH = DATA_DIR / "election-petition" / "election-petition-results.json" + + TOP_PARTIES = [ + "Conservative", + "Labour", + "Liberal Democrat", + "Reform UK", + "Green", + "Scottish National Party", + "Plaid Cymru", + "Independent", + ] + + PARTY_COLOURS = { + "Conservative": "#0087DC", + "Labour": "#E4003B", + "Liberal Democrat": "#FAA61A", + "Reform UK": "#12B6CF", + "Green": "#02A95B", + "Scottish National Party": "#FDF38E", + "Plaid Cymru": "#005B54", + "Independent": "grey", + } + + sns.set_style("ticks") + sns.set_palette(PARTY_COLOURS.values()) + return ( + ELECTION_RESULTS_PATH, + PETITION_RESULTS_PATH, + TOP_PARTIES, + json, + mo, + pd, + sns, + ) + + +@app.cell +def _(ELECTION_RESULTS_PATH, PETITION_RESULTS_PATH, json, pd): + with PETITION_RESULTS_PATH.open("r", encoding="utf-8") as f: + petition_results_json = json.load(f) + + petition_results_df = ( + pd.DataFrame.from_records( + petition_results_json["data"]["attributes"]["signatures_by_constituency"], + exclude=["name", "mp"], + ) + .rename(columns={"ons_code": "ons_id"}) + ) + + df = ( + pd.read_csv( + ELECTION_RESULTS_PATH, + header=0, + usecols=[ + "ONS ID", + "Constituency name", + "Region name", + "Party name", + "Votes", + "Share", + ] + ) + .rename(columns={ + "ONS ID": "ons_id", + "Constituency name": "constituency", + "Region name": "region", + "Party name": "party", + "Votes": "votes", + "Share": "share", + }) + .replace({"party": {"Labour and Co-operative": "Labour"}}) # Labour in Wales + .merge(petition_results_df, on="ons_id", how="left") + ) + + df = ( + df + .join(df.groupby("ons_id")["votes"].sum().rename("total_votes"), how="left", on="ons_id") + .assign(signatures_per_vote=lambda x: x["signature_count"] / x["total_votes"]) + ) + + df = ( + df.merge( + df + .sort_values("votes", ascending=False) + .groupby("ons_id") + .first()[["party", "votes"]] + .assign(winner=True) + .reset_index(), + on=["ons_id", "party", "votes"], + how="left", + ) + .fillna({"winner": False}) + ) + + print(f'Total petition signatures: {petition_results_json["data"]["attributes"]["signature_count"]:,d}') + print(f'Total petition signatures from per-consituency counts: {df.groupby("ons_id")["signature_count"].max().sum():,d}') + return (df,) + + +@app.cell +def _(df): + df + return + + +@app.cell +def _(df, sns): + ax = sns.histplot( + df.groupby("ons_id")["signature_count"].max() + ) + ax.set(xlabel="Signatures", ylabel="Constituencies", title="Distribution of Petition Signatures by Constituency") + ax + return + + +@app.cell +def _(mo): + mo.md(r"""### Signature Distribution""") + return + + +@app.cell +def _(TOP_PARTIES, df, sns): + _top_n = 3 + _g = sns.displot( + df[df['winner'] & df['party'].isin(TOP_PARTIES[:_top_n])], + kind='hist', + x='signature_count', + hue='party', + hue_order=TOP_PARTIES[:_top_n], + col='party', + col_order=TOP_PARTIES[:_top_n], + height=3, + legend=False, + ).set_titles('{col_name}').set_axis_labels('Signatures', 'Constituencies') + _g.figure + return + + +@app.cell +def _(mo): + mo.md(r"""### Signatures per 2024 General Election Vote""") + return + + +@app.cell +def _(TOP_PARTIES, df, sns): + _top_n = 3 + _g = ( + sns.displot( + df[df["winner"] & df["party"].isin(TOP_PARTIES[:_top_n])], + kind="hist", + x="signatures_per_vote", + hue="party", + hue_order=TOP_PARTIES[:_top_n], + col="party", + col_order=TOP_PARTIES[:_top_n], + height=3, + legend=False, + ) + .set_titles("{col_name}") + .set_axis_labels("Signatures per Vote", "Constituencies") + ) + _g.figure + return + + +@app.cell +def _(mo): + mo.md(r"""### Signatures vs. Votes""") + return + + +@app.cell +def _(TOP_PARTIES, df, sns): + _g = sns.lmplot( + df[df["party"].isin(TOP_PARTIES)], + x="votes", + y="signature_count", + col="party", + col_wrap=4, + col_order=TOP_PARTIES, + hue="party", + hue_order=TOP_PARTIES, + height=3, + ).set_titles("{col_name}").set_axis_labels("2024 General Election Votes", "Petition Signatures") + _g.figure + return + + +@app.cell +def _(mo): + mo.md(r"""### Signatures vs. Vote Share""") + return + + +@app.cell +def _(TOP_PARTIES, df, sns): + _g = sns.lmplot( + df[df["party"].isin(TOP_PARTIES)], + x="share", + y="signature_count", + col="party", + col_wrap=4, + col_order=TOP_PARTIES, + hue="party", + hue_order=TOP_PARTIES, + height=3, + ).set_titles("{col_name}").set_axis_labels("2024 General Election Vote Share", "Petition Signatures") + _g.figure + return + + +@app.cell +def _(mo): + mo.md(r"""### Signatures per Vote vs. Vote Share""") + return + + +@app.cell +def _(TOP_PARTIES, df, sns): + _g = sns.lmplot( + df[df["party"].isin(TOP_PARTIES)], + x="share", + y="signatures_per_vote", + col="party", + col_wrap=4, + col_order=TOP_PARTIES, + hue="party", + hue_order=TOP_PARTIES, + height=3, + ).set_titles("{col_name}").set_axis_labels("2024 General Election Vote Share", "Petition Signatures per Vote") + _g.figure + return + + +if __name__ == "__main__": + app.run() |