aboutsummaryrefslogtreecommitdiff
path: root/notebooks/2024-11-29-election-petition.py
diff options
context:
space:
mode:
authorPaul Harrison <paul@harrison.sh>2025-08-18 13:25:01 +0100
committerPaul Harrison <paul@harrison.sh>2025-08-18 13:25:01 +0100
commitae7ba4c8cbdfa772e9c6eddc5af4f313872957a0 (patch)
tree49c7de811214d8e1de3fa71da3b2dd1d549c81ac /notebooks/2024-11-29-election-petition.py
parent12241dbc3852415867f0b9e16c115cfe26c3476a (diff)
Switch from Jupyter to isolated Marimo notebooksHEADmain
Diffstat (limited to 'notebooks/2024-11-29-election-petition.py')
-rw-r--r--notebooks/2024-11-29-election-petition.py286
1 files changed, 286 insertions, 0 deletions
diff --git a/notebooks/2024-11-29-election-petition.py b/notebooks/2024-11-29-election-petition.py
new file mode 100644
index 0000000..0666c7f
--- /dev/null
+++ b/notebooks/2024-11-29-election-petition.py
@@ -0,0 +1,286 @@
+# /// script
+# requires-python = ">=3.13"
+# dependencies = [
+# "marimo",
+# "matplotlib==3.10.5",
+# "pandas==2.3.1",
+# "python-dotenv==1.1.1",
+# "seaborn==0.13.2",
+# ]
+# ///
+
+import marimo
+
+__generated_with = "0.14.17"
+app = marimo.App()
+
+
+@app.cell(hide_code=True)
+def _(mo):
+ mo.md(
+ r"""
+ # Analysing the Petition for a General Election
+
+ 2024 general election results where downloaded from [here](https://commonslibrary.parliament.uk/research-briefings/cbp-10009/) on 29 November 2024. Petition results were downloaded using the following command on 29 November 2024:
+
+ ```shell
+ curl https://petition.parliament.uk/petitions/700143.json | jq > data/election-petition-results.json
+ ```
+ """
+ )
+ return
+
+
+@app.cell
+def _():
+ import json
+ from pathlib import Path
+
+ import marimo as mo
+ import matplotlib.pyplot as plt
+ import pandas as pd
+ import seaborn as sns
+ from dotenv import find_dotenv
+
+ ROOT_DIR = Path(find_dotenv("pyproject.toml")).parent
+ DATA_DIR = ROOT_DIR / "data"
+ ELECTION_RESULTS_PATH = DATA_DIR / "election-petition" / "HoC-GE2024-results-by-candidate.csv"
+ PETITION_RESULTS_PATH = DATA_DIR / "election-petition" / "election-petition-results.json"
+
+ TOP_PARTIES = [
+ "Conservative",
+ "Labour",
+ "Liberal Democrat",
+ "Reform UK",
+ "Green",
+ "Scottish National Party",
+ "Plaid Cymru",
+ "Independent",
+ ]
+
+ PARTY_COLOURS = {
+ "Conservative": "#0087DC",
+ "Labour": "#E4003B",
+ "Liberal Democrat": "#FAA61A",
+ "Reform UK": "#12B6CF",
+ "Green": "#02A95B",
+ "Scottish National Party": "#FDF38E",
+ "Plaid Cymru": "#005B54",
+ "Independent": "grey",
+ }
+
+ sns.set_style("ticks")
+ sns.set_palette(PARTY_COLOURS.values())
+ return (
+ ELECTION_RESULTS_PATH,
+ PETITION_RESULTS_PATH,
+ TOP_PARTIES,
+ json,
+ mo,
+ pd,
+ sns,
+ )
+
+
+@app.cell
+def _(ELECTION_RESULTS_PATH, PETITION_RESULTS_PATH, json, pd):
+ with PETITION_RESULTS_PATH.open("r", encoding="utf-8") as f:
+ petition_results_json = json.load(f)
+
+ petition_results_df = (
+ pd.DataFrame.from_records(
+ petition_results_json["data"]["attributes"]["signatures_by_constituency"],
+ exclude=["name", "mp"],
+ )
+ .rename(columns={"ons_code": "ons_id"})
+ )
+
+ df = (
+ pd.read_csv(
+ ELECTION_RESULTS_PATH,
+ header=0,
+ usecols=[
+ "ONS ID",
+ "Constituency name",
+ "Region name",
+ "Party name",
+ "Votes",
+ "Share",
+ ]
+ )
+ .rename(columns={
+ "ONS ID": "ons_id",
+ "Constituency name": "constituency",
+ "Region name": "region",
+ "Party name": "party",
+ "Votes": "votes",
+ "Share": "share",
+ })
+ .replace({"party": {"Labour and Co-operative": "Labour"}}) # Labour in Wales
+ .merge(petition_results_df, on="ons_id", how="left")
+ )
+
+ df = (
+ df
+ .join(df.groupby("ons_id")["votes"].sum().rename("total_votes"), how="left", on="ons_id")
+ .assign(signatures_per_vote=lambda x: x["signature_count"] / x["total_votes"])
+ )
+
+ df = (
+ df.merge(
+ df
+ .sort_values("votes", ascending=False)
+ .groupby("ons_id")
+ .first()[["party", "votes"]]
+ .assign(winner=True)
+ .reset_index(),
+ on=["ons_id", "party", "votes"],
+ how="left",
+ )
+ .fillna({"winner": False})
+ )
+
+ print(f'Total petition signatures: {petition_results_json["data"]["attributes"]["signature_count"]:,d}')
+ print(f'Total petition signatures from per-consituency counts: {df.groupby("ons_id")["signature_count"].max().sum():,d}')
+ return (df,)
+
+
+@app.cell
+def _(df):
+ df
+ return
+
+
+@app.cell
+def _(df, sns):
+ ax = sns.histplot(
+ df.groupby("ons_id")["signature_count"].max()
+ )
+ ax.set(xlabel="Signatures", ylabel="Constituencies", title="Distribution of Petition Signatures by Constituency")
+ ax
+ return
+
+
+@app.cell
+def _(mo):
+ mo.md(r"""### Signature Distribution""")
+ return
+
+
+@app.cell
+def _(TOP_PARTIES, df, sns):
+ _top_n = 3
+ _g = sns.displot(
+ df[df['winner'] & df['party'].isin(TOP_PARTIES[:_top_n])],
+ kind='hist',
+ x='signature_count',
+ hue='party',
+ hue_order=TOP_PARTIES[:_top_n],
+ col='party',
+ col_order=TOP_PARTIES[:_top_n],
+ height=3,
+ legend=False,
+ ).set_titles('{col_name}').set_axis_labels('Signatures', 'Constituencies')
+ _g.figure
+ return
+
+
+@app.cell
+def _(mo):
+ mo.md(r"""### Signatures per 2024 General Election Vote""")
+ return
+
+
+@app.cell
+def _(TOP_PARTIES, df, sns):
+ _top_n = 3
+ _g = (
+ sns.displot(
+ df[df["winner"] & df["party"].isin(TOP_PARTIES[:_top_n])],
+ kind="hist",
+ x="signatures_per_vote",
+ hue="party",
+ hue_order=TOP_PARTIES[:_top_n],
+ col="party",
+ col_order=TOP_PARTIES[:_top_n],
+ height=3,
+ legend=False,
+ )
+ .set_titles("{col_name}")
+ .set_axis_labels("Signatures per Vote", "Constituencies")
+ )
+ _g.figure
+ return
+
+
+@app.cell
+def _(mo):
+ mo.md(r"""### Signatures vs. Votes""")
+ return
+
+
+@app.cell
+def _(TOP_PARTIES, df, sns):
+ _g = sns.lmplot(
+ df[df["party"].isin(TOP_PARTIES)],
+ x="votes",
+ y="signature_count",
+ col="party",
+ col_wrap=4,
+ col_order=TOP_PARTIES,
+ hue="party",
+ hue_order=TOP_PARTIES,
+ height=3,
+ ).set_titles("{col_name}").set_axis_labels("2024 General Election Votes", "Petition Signatures")
+ _g.figure
+ return
+
+
+@app.cell
+def _(mo):
+ mo.md(r"""### Signatures vs. Vote Share""")
+ return
+
+
+@app.cell
+def _(TOP_PARTIES, df, sns):
+ _g = sns.lmplot(
+ df[df["party"].isin(TOP_PARTIES)],
+ x="share",
+ y="signature_count",
+ col="party",
+ col_wrap=4,
+ col_order=TOP_PARTIES,
+ hue="party",
+ hue_order=TOP_PARTIES,
+ height=3,
+ ).set_titles("{col_name}").set_axis_labels("2024 General Election Vote Share", "Petition Signatures")
+ _g.figure
+ return
+
+
+@app.cell
+def _(mo):
+ mo.md(r"""### Signatures per Vote vs. Vote Share""")
+ return
+
+
+@app.cell
+def _(TOP_PARTIES, df, sns):
+ _g = sns.lmplot(
+ df[df["party"].isin(TOP_PARTIES)],
+ x="share",
+ y="signatures_per_vote",
+ col="party",
+ col_wrap=4,
+ col_order=TOP_PARTIES,
+ hue="party",
+ hue_order=TOP_PARTIES,
+ height=3,
+ ).set_titles("{col_name}").set_axis_labels("2024 General Election Vote Share", "Petition Signatures per Vote")
+ _g.figure
+ return
+
+
+if __name__ == "__main__":
+ app.run()