In Practice: Modeling Signaling Pathways

can be studied computationally in two complementary ways: mechanistic models (ODEs, Boolean networks) that simulate the dynamics of molecular interactions, and data-driven methods that infer activity from transcriptomic or proteomic measurements. Both approaches are used in practice, often together.

In this chapter we'll build a simple ODE model of the MAPK cascade, implement a Boolean network for the cycle G1/S transition, and compute activity scores from expression data.

Setup

bash

pip install numpy scipy matplotlib pandas requests

python

import numpy as np
from scipy.integrate import solve_ivp
import matplotlib.pyplot as plt
import pandas as pd

ODE Modeling: The MAPK Cascade

The MAPK/ERK cascade (RAS → RAF → MEK → ERK) can be modeled as a series of activation/deactivation reactions. We'll use a simplified Michaelis-Menten kinetics model.

Model Design

For each kinase, we track the fraction that is in the active (phosphorylated) state. Each activation follows Michaelis-Menten kinetics; deactivation is linear (phosphatase activity).

dRAF*/dt  = k1 * RAS * (1 - RAF*) / (Km1 + (1 - RAF*)) - k2 * RAF*
dMEK*/dt  = k3 * RAF* * (1 - MEK*) / (Km3 + (1 - MEK*)) - k4 * MEK*
dERK*/dt  = k5 * MEK* * (1 - ERK*) / (Km5 + (1 - ERK*)) - k6 * ERK*

Where:

X* = fraction of X in active state (0 to 1)
k_odd = rate constants for activation (phosphorylation by upstream kinase)
k_even = rate constants for deactivation (phosphatase)
Km = Michaelis constant for phosphorylation
RAS = input signal (fixed or time-varying)

python

def mapk_cascade(t, y, params, ras_signal):
    raf_star, mek_star, erk_star = y
    k1, k2, km1, k3, k4, km3, k5, k6, km5 = params

    ras = ras_signal(t)  # input signal as function of time

    # RAF activation/deactivation
    draf = k1 * ras * (1 - raf_star) / (km1 + (1 - raf_star)) - k2 * raf_star

    # MEK activation/deactivation
    dmek = k3 * raf_star * (1 - mek_star) / (km3 + (1 - mek_star)) - k4 * mek_star

    # ERK activation/deactivation
    derk = k5 * mek_star * (1 - erk_star) / (km5 + (1 - erk_star)) - k6 * erk_star

    return [draf, dmek, derk]

# Parameters: [k1, k2, km1, k3, k4, km3, k5, k6, km5]
params = [
    0.5, 0.1, 0.1,   # RAF activation/deactivation/Km
    0.5, 0.1, 0.1,   # MEK activation/deactivation/Km
    0.5, 0.1, 0.1,   # ERK activation/deactivation/Km
]

# Initial conditions: all inactive
y0 = [0.0, 0.0, 0.0]  # RAF*, MEK*, ERK*

# Time span
t_span = (0, 100)
t_eval = np.linspace(0, 100, 500)

Scenario 1: Step Input (Sustained Growth Factor)

python

# Constant RAS signal: growth factor applied at t=5
def ras_step(t):
    return 1.0 if t >= 5 else 0.0

sol_step = solve_ivp(
    mapk_cascade,
    t_span,
    y0,
    t_eval=t_eval,
    args=(params, ras_step),
    method="RK45",
    rtol=1e-6
)

plt.figure(figsize=(10, 5))
plt.plot(sol_step.t, sol_step.y[0], label="RAF* (active)", linewidth=2)
plt.plot(sol_step.t, sol_step.y[1], label="MEK* (active)", linewidth=2)
plt.plot(sol_step.t, sol_step.y[2], label="ERK* (active)", linewidth=2)
plt.axvline(x=5, color="gray", linestyle="--", alpha=0.5, label="Growth factor ON")
plt.xlabel("Time (min)")
plt.ylabel("Fraction active")
plt.title("MAPK Cascade — Step Input (Sustained GF)")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig("mapk_step.png", dpi=150)
plt.show()

Scenario 2: Pulse Input (Transient Signal)

python

def ras_pulse(t):
    return 1.0 if 5 <= t <= 15 else 0.0

sol_pulse = solve_ivp(
    mapk_cascade, t_span, y0, t_eval=t_eval,
    args=(params, ras_pulse), method="RK45", rtol=1e-6
)

plt.figure(figsize=(10, 5))
plt.plot(sol_pulse.t, sol_pulse.y[2], label="ERK* (pulse input)", linewidth=2, color="red")
plt.plot(sol_step.t, sol_step.y[2], label="ERK* (step input)", linewidth=2, color="blue", linestyle="--")
plt.xlabel("Time (min)")
plt.ylabel("ERK* fraction active")
plt.title("ERK Response: Pulse vs. Step Input")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
plt.savefig("erk_comparison.png", dpi=150)
plt.show()

Scenario 3: Simulating an Inhibitor

Simulate adding a MEK inhibitor (like trametinib) by reducing MEK activation rate:

python

def simulate_with_inhibitor(inhibitor_strength: float, params: list, label: str):
    """inhibitor_strength: 0=no effect, 1=complete inhibition"""
    params_mod = params.copy()
    params_mod[3] *= (1 - inhibitor_strength)  # reduce k3 (MEK activation)

    sol = solve_ivp(
        mapk_cascade, t_span, y0, t_eval=t_eval,
        args=(params_mod, ras_step), method="RK45", rtol=1e-6
    )
    return sol

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

for strength, label in [(0.0, "No inhibitor"), (0.5, "50% MEK inhibition"), (0.9, "90% MEK inhibition")]:
    sol = simulate_with_inhibitor(strength, params, label)
    axes[0].plot(sol.t, sol.y[1], label=f"MEK* ({label})")
    axes[1].plot(sol.t, sol.y[2], label=f"ERK* ({label})")

for ax, title in zip(axes, ["MEK Activity", "ERK Activity"]):
    ax.axvline(x=5, color="gray", linestyle="--", alpha=0.5)
    ax.set_xlabel("Time (min)")
    ax.set_ylabel("Fraction active")
    ax.set_title(title)
    ax.legend(fontsize=8)
    ax.grid(alpha=0.3)

plt.suptitle("MEK Inhibitor Dose Response")
plt.tight_layout()
plt.savefig("mek_inhibitor.png", dpi=150)
plt.show()

Boolean Network: G1/S Cell Cycle Transition

For larger networks where kinetic parameters are unknown, Boolean models capture the logical structure:

python

class BooleanNetwork:
    def __init__(self, update_rules: dict, initial_state: dict):
        self.rules = update_rules
        self.state = initial_state.copy()
        self.history = [initial_state.copy()]

    def step(self):
        new_state = {}
        for node, rule in self.rules.items():
            new_state[node] = rule(self.state)
        self.state = new_state
        self.history.append(new_state.copy())

    def run(self, n_steps: int):
        for _ in range(n_steps):
            self.step()

    def get_attractor(self) -> tuple:
        """Check if the network has reached a steady state."""
        if len(self.history) < 2:
            return None
        current = frozenset(self.state.items())
        for i, past_state in enumerate(self.history[:-1]):
            if frozenset(past_state.items()) == current:
                return i  # cycle detected, period = current_idx - i
        return None


# G1/S transition Boolean model
# Nodes: GF (growth factor), CycD, Rb, E2F, CycE, S_phase
# Based on simplified Fauré et al. 2006

def make_g1s_network(gf_active: bool = True):
    initial_state = {
        "GF": gf_active,       # growth factor signal
        "CycD": False,         # Cyclin D
        "Rb": True,            # RB protein (active = growth suppression)
        "E2F": False,          # E2F transcription factor (active = S-phase genes)
        "CycE": False,         # Cyclin E
        "S_phase": False,      # S-phase entry
    }

    update_rules = {
        "GF": lambda s: s["GF"],   # externally set
        "CycD": lambda s: s["GF"],  # CycD is driven by GF
        "Rb": lambda s: not s["CycD"] and not s["CycE"],  # inhibited by CycD and CycE
        "E2F": lambda s: not s["Rb"],   # activated when Rb is inactivated
        "CycE": lambda s: s["E2F"],     # activated by E2F
        "S_phase": lambda s: s["E2F"] and s["CycE"],  # requires both
    }

    return BooleanNetwork(update_rules, initial_state)

# Run with GF present
net_gf = make_g1s_network(gf_active=True)
net_gf.run(10)

# Run without GF
net_no_gf = make_g1s_network(gf_active=False)
net_no_gf.run(10)

# Visualize
def plot_boolean_trajectory(history: list, title: str, ax):
    nodes = list(history[0].keys())
    times = range(len(history))

    for i, node in enumerate(nodes):
        values = [h[node] for h in history]
        ax.step(times, [v + i * 1.5 for v in values], where="post", linewidth=2)
        ax.text(-0.5, i * 1.5 + 0.5, node, ha="right", fontsize=9)

    ax.set_xlim(-1, len(history))
    ax.set_xlabel("Time steps")
    ax.set_yticks([])
    ax.set_title(title)
    ax.grid(axis="x", alpha=0.3)

fig, axes = plt.subplots(1, 2, figsize=(14, 6))
plot_boolean_trajectory(net_gf.history, "With Growth Factor", axes[0])
plot_boolean_trajectory(net_no_gf.history, "Without Growth Factor", axes[1])
plt.tight_layout()
plt.savefig("boolean_cell_cycle.png", dpi=150)
plt.show()

Pathway Activity Scoring from Expression Data

Given a matrix, compute activity scores by summing the expression of member .

Method 1: Simple Z-score Aggregation

python

import numpy as np

def pathway_score_zscore(
    expression_matrix: np.ndarray,  # genes × samples
    gene_names: list,
    pathway_genes: list
) -> np.ndarray:
    """Score pathway activity by mean z-score of member genes."""
    # Find pathway gene indices
    gene_index = {g: i for i, g in enumerate(gene_names)}
    indices = [gene_index[g] for g in pathway_genes if g in gene_index]

    if not indices:
        return np.zeros(expression_matrix.shape[1])

    # Z-score normalize per gene
    mean = expression_matrix.mean(axis=1, keepdims=True)
    std = expression_matrix.std(axis=1, keepdims=True) + 1e-8
    z_scores = (expression_matrix - mean) / std

    # Mean z-score of pathway genes per sample
    return z_scores[indices, :].mean(axis=0)

# Example with simulated data
np.random.seed(42)
n_genes, n_samples = 1000, 50
gene_names = [f"GENE_{i}" for i in range(n_genes)]

# Simulate: MAPK pathway genes upregulated in first 25 samples
expression = np.random.randn(n_genes, n_samples)
mapk_pathway_indices = [10, 25, 50, 100, 200]  # "MAPK pathway genes"
mapk_genes = [gene_names[i] for i in mapk_pathway_indices]

# Add signal to first 25 samples
expression[mapk_pathway_indices, :25] += 2.0

scores = pathway_score_zscore(expression, gene_names, mapk_genes)
print(f"Samples 1-25 (MAPK active): mean score = {scores[:25].mean():.2f}")
print(f"Samples 26-50 (MAPK inactive): mean score = {scores[25:].mean():.2f}")

Method 2: GSEA-style Enrichment Score

python

def gsea_enrichment_score(
    gene_scores: dict,   # gene → score (e.g., fold change)
    gene_set: set,       # genes in the pathway
) -> float:
    """
    Compute a simplified GSEA enrichment score.
    Ranks genes by score, then computes running sum.
    """
    # Sort genes by score (descending)
    sorted_genes = sorted(gene_scores.keys(), key=lambda g: -gene_scores[g])
    n_total = len(sorted_genes)
    n_set = sum(1 for g in sorted_genes if g in gene_set)

    if n_set == 0:
        return 0.0

    # Compute running enrichment score
    p_hit = 1.0 / n_set
    p_miss = 1.0 / (n_total - n_set)

    running_sum = 0.0
    max_deviation = 0.0

    for gene in sorted_genes:
        if gene in gene_set:
            running_sum += p_hit
        else:
            running_sum -= p_miss
        if abs(running_sum) > abs(max_deviation):
            max_deviation = running_sum

    return max_deviation


# Example: test MAPK gene set enrichment
gene_fold_changes = {
    f"GENE_{i}": np.random.randn() for i in range(1000)
}
# Artificially upregulate MAPK genes
for i in mapk_pathway_indices:
    gene_fold_changes[f"GENE_{i}"] = 3.0 + np.random.randn() * 0.5

es = gsea_enrichment_score(gene_fold_changes, set(mapk_genes))
print(f"MAPK pathway enrichment score: {es:.3f}")

Using KEGG Pathway Data

python

import requests

def get_kegg_pathway_genes(pathway_id: str = "hsa04010") -> dict:
    """Fetch gene list for a KEGG pathway. hsa04010 = MAPK pathway (human)."""
    url = f"https://rest.kegg.jp/get/{pathway_id}"
    response = requests.get(url)
    response.raise_for_status()

    genes = {}
    in_gene_section = False

    for line in response.text.split("\n"):
        if line.startswith("GENE"):
            in_gene_section = True
            line = line[12:]  # skip "GENE        "
        elif line.startswith("COMPOUND") or line.startswith("REFERENCE"):
            in_gene_section = False

        if in_gene_section and line.strip():
            parts = line.strip().split(";")
            if len(parts) >= 1:
                gene_part = parts[0].strip().split()
                if len(gene_part) >= 2:
                    gene_id = gene_part[0]
                    gene_symbol = gene_part[1]
                    genes[gene_id] = gene_symbol

    return genes

# Fetch MAPK pathway
try:
    mapk_genes_kegg = get_kegg_pathway_genes("hsa04010")
    print(f"MAPK pathway has {len(mapk_genes_kegg)} genes")
    print("Sample genes:", list(mapk_genes_kegg.values())[:10])
except Exception as e:
    print(f"KEGG fetch failed: {e}")
    # Fallback: hardcoded core MAPK genes
    mapk_genes_kegg = {
        "5894": "RAF1", "673": "BRAF", "5604": "MAP2K1",
        "5605": "MAP2K2", "5594": "MAPK1", "5595": "MAPK3",
        "3265": "HRAS", "4893": "NRAS", "3845": "KRAS"
    }

Complete Pipeline: From Expression to Pathway Activity

python

def compute_pathway_activities(
    expression_df: pd.DataFrame,  # rows=genes, cols=samples
    pathways: dict,                # pathway_name → [gene_list]
) -> pd.DataFrame:
    """
    Compute pathway activity scores for all samples across multiple pathways.
    Returns DataFrame with rows=samples, cols=pathways.
    """
    activities = {}
    gene_names = list(expression_df.index)

    for pathway_name, pathway_genes in pathways.items():
        scores = pathway_score_zscore(
            expression_df.values,
            gene_names,
            pathway_genes
        )
        activities[pathway_name] = scores

    return pd.DataFrame(activities, index=expression_df.columns)


# Simulated example
np.random.seed(42)
n_genes, n_samples = 2000, 60
sample_names = [f"tumor_{i}" for i in range(30)] + [f"normal_{i}" for i in range(30)]
gene_symbols = [f"GENE_{i}" for i in range(n_genes)]

expr_data = np.abs(np.random.randn(n_genes, n_samples))

# Simulate pathway activation in tumor samples
pathway_definitions = {
    "MAPK_pathway": [f"GENE_{i}" for i in range(50, 60)],
    "PI3K_pathway": [f"GENE_{i}" for i in range(100, 110)],
    "Apoptosis":    [f"GENE_{i}" for i in range(200, 210)],
}

# Activate MAPK in tumors, suppress apoptosis
for gene in pathway_definitions["MAPK_pathway"]:
    idx = gene_symbols.index(gene)
    expr_data[idx, :30] *= 3.0

for gene in pathway_definitions["Apoptosis"]:
    idx = gene_symbols.index(gene)
    expr_data[idx, :30] *= 0.3

expr_df = pd.DataFrame(expr_data, index=gene_symbols, columns=sample_names)

activity_df = compute_pathway_activities(expr_df, pathway_definitions)
print("Pathway activity scores (first 5 samples of each group):")
print("\nTumor samples:")
print(activity_df.loc[[f"tumor_{i}" for i in range(5)]].round(2))
print("\nNormal samples:")
print(activity_df.loc[[f"normal_{i}" for i in range(5)]].round(2))

# Visualize with heatmap
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(10, 6))
im = ax.imshow(activity_df.T.values, aspect="auto", cmap="RdBu_r")
ax.set_xticks(range(0, n_samples, 5))
ax.set_xticklabels([sample_names[i] for i in range(0, n_samples, 5)], rotation=45, ha="right", fontsize=7)
ax.set_yticks(range(len(pathway_definitions)))
ax.set_yticklabels(list(pathway_definitions.keys()))
ax.set_title("Pathway Activity Scores: Tumor vs. Normal")
plt.colorbar(im, ax=ax, label="Activity z-score")
plt.tight_layout()
plt.savefig("pathway_activity_heatmap.png", dpi=150)
plt.show()

Summary

Approach	When to use	Tools/Libraries
ODE models	When kinetic parameters are available; studying dynamics	scipy.integrate
Boolean networks	Large networks without quantitative params; logic structure	Custom (networkx for visualization)
Pathway scoring	Analyzing existing expression datasets	numpy, pandas
GSEA-style enrichment	Testing whether a gene set is enriched in differential data	fgsea (R), GSEA Java app
KEGG/Reactome API	Fetching curated pathway gene lists	requests + KEGG REST API

Mechanistic modeling is most useful for generating testable hypotheses about dynamics. Data-driven scoring is most useful for interpreting existing datasets. In practice, you use both: models guide your biological interpretation of the data, and the data validates or challenges the models.