Estimating growth rates#

Outline approach used in the app to estimate the growth rate of biological growth curves with lag phase using growthcurves package

Here we compare different packages, explain some differences between parameters using a simulated logistic growth curve with noise.

parametric model based fitting of noisy data
linear regression on non log-transformed data
linear regression on log-transformed data
spline fitting and derivatives
croissance exponential model fitting
growthcurves package fitting, comparing all methods

The \(\mu_{max}\) growth is the maximum of the second derivative or the maximum of the log transformed data slope (if estimated with a sliding window).

Setup#

Imports

Show code cell source

Hide code cell source

class GrowthParams(NamedTuple):
    a: float
    r: float
    t0: float
    shift_y: float

    def __repr__(self):
        return (
            f"GrowthParams(a={self.a:.3f}, r={self.r:.3f},"
            f" t0={self.t0:.3f}, shift_y={self.shift_y:.3f})"
        )


def generate_growth_curve(
    time_points: np.array,
    lag_duration: float = 2.0,
    growth_rate: float = 0.5,
    max_population: float = 1.0,
    shift: float = 0.0,
    noise_level: float = 0.02,
    random_seed: int = None,
    non_negative: bool = True,
    log_transform: bool = False,
    epsilon: float = 0.0001,
):
    """
    Generate biological growth curve with lag phase using modified logistic equation.

    Standard logistic: N(t) = K / (1 + ((K-N₀)/N₀) * exp(-r*t))
    With lag phase: Growth starts at effective time t_eff = t - lag_duration

    Parameters:
    -----------
    time_points : array-like
        Time points at which to evaluate the growth curve
    lag_duration : float
        Duration of the lag phase (same units as time_points)
    growth_rate : float
        Intrinsic/maximum specific growth rate r (1/time)
    max_population : float
        Carrying capacity K / maximum population size
    shift : float
        Vertical shift (e.g., for background OD)
    noise_level : float
        Standard deviation of Gaussian noise
    random_seed : int, optional
        Random seed for reproducibility
    non_negative : bool
        Whether to enforce non-negative population values
    log_transform : bool
        Whether to apply natural log transform to the output
    epsilon : float
        Small offset added before log to avoid log(0)

    Returns:
    --------
    population : numpy array
        Population values with noise at each time point
    population_clean : numpy array
        Population values without noise (for comparison)
    """
    if random_seed is not None:
        np.random.seed(random_seed)

    time_points = np.array(time_points)

    # Logistic growth equation with lag phase
    # During lag: keep population near initial with minimal growth
    # After lag: apply standard logistic starting from t_eff = 0
    adjusted_time = time_points - lag_duration
    population_clean = max_population / (1 + np.exp(-growth_rate * adjusted_time))

    # Add vertical shift (e.g., background OD reading)
    if shift > 0.0:
        population_clean = population_clean + shift

    # Add Gaussian noise
    noise = np.random.normal(
        0,
        noise_level * max_population,
        size=len(time_points),
    )
    population = population_clean + noise

    # Ensure non-negative
    if non_negative:
        population = np.maximum(population, epsilon)

    # Optional log transform (for exponential phase detection)
    if log_transform:
        population = np.log(population + epsilon)
        population_clean = np.log(population_clean + epsilon)

    return population, population_clean


def plot_simulated_growth_curve(
    time: Iterable[float],
    pop_clean,
    pop_noisy,
):
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(time, pop_clean, "b-", linewidth=2, label="Clean signal")
    ax.plot(time, pop_noisy, "r.", alpha=0.5, label="With noise")
    ax.set_xlabel("Time (hours)")
    ax.set_ylabel("Population (OD or cells/mL)")
    ax.set_title("Biological Growth Curve with Lag Phase")
    ax.legend()
    ax.grid(True, alpha=0.3)
    fig.tight_layout()
    return fig, ax


def smooth(y: np.ndarray[float], window: int = 21, poly: int = 1, passes: int = 1):
    """Smooth a series with Savitzky-Golay filtering (odd window, multi-pass)."""
    n = y.size
    if n < 7:
        return y
    w = int(window) | 1  # odd
    w = min(w, n if n % 2 else n - 1)
    p = min(int(poly), w - 1)
    for _ in range(int(passes)):
        y = savgol_filter(y, w, p, mode="interp")
    return y


def timeindex_to_hours(index: pd.DatetimeIndex):
    """
    Convert a pandas DatetimeIndex to elapsed hours since the first timestamp.

    Parameters
    ----------
    index : pd.DatetimeIndex
        The DatetimeIndex to convert.

    Returns
    -------
    numpy.ndarray
        Array of elapsed hours (float) since the first timestamp in the index,
        reshaped as a column vector.
    """
    (index - index[0]).total_seconds().to_numpy() / 3600
    return


def fit_linear_growth(s: pd.Series):
    """
    Fit a linear regression model to a time series window.

    Parameters
    ----------
    series_window : pd.Series
        Time-indexed series of population measurements.

    Returns
    -------
    linreg : LinearRegression
        Fitted linear regression model.
    """
    X = (s.index - s.index[0]).total_seconds().to_numpy().reshape(-1, 1) / 3600  # hours
    y = s.values.reshape(-1, 1)
    linreg = LinearRegression().fit(X, y)
    return linreg


def get_slope(s: pd.Series):
    linreg_fitted = fit_linear_growth(s)
    return linreg_fitted.coef_[0][0]


def get_tangent(s, max_idx, max_slope, window_hours=1):
    """
    Compute the tangent line around the maximum slope point in log2-transformed data.

    Parameters
    ----------
    s : pd.Series
        Data with datetime index.
    max_idx : pd.Timestamp
        Index of the maximum slope point.
    s_log2 : pd.DataFrame or pd.Series
        Log2-transformed smoothed data.
    max_slope : float
        Maximum slope value.
    window_hours : float, optional
        Window size in hours around max_idx to show the tangent (default: 1).

    Returns
    -------
    tangent : np.ndarray
        Array with tangent values (NaN outside the window).
    """
    x = (s.index - s.index[0]).total_seconds() / 3600  # hours
    x0 = (max_idx - s.index[0]).total_seconds() / 3600
    y0 = (
        s.loc[max_idx].values[0]
        if hasattr(s.loc[max_idx], "values")
        else s.loc[max_idx]
    )

    # Mask for ±window_hours around max_idx
    mask = (x >= x0 - window_hours) & (x <= x0 + window_hours)
    tangent = np.full_like(x, np.nan, dtype=float)
    tangent[mask] = max_slope * (x[mask] - x0) + y0
    return tangent

Model equations

use non-ODE formulation in comparison to the growthcurves package

S-Shaped growth curve (Logistic)#

Generate time points:

2,880 would we every 30seconds for a day
17,280 would be every 5seconds for a day

Plot the model with the parameters specified above

fig, ax = plot_simulated_growth_curve(time_in_h, model_curve, pop_noisy)

Curve fitting on noisy data#

Fitted parameters with noisy data

params_fitted, pcov = curve_fit(
    logistic_model, time_in_h, pop_noisy, p0=[1.0, 0.5, 3.0, 0.0]
)
GrowthParams(*params_fitted)

GrowthParams(a=1.502, r=0.598, t0=9.978, shift_y=0.199)

Fitted parameters with smoothed data (removing some noise)

difference is here minimal with low and homoscedastic noise

params_fitted, pcov = curve_fit(
    logistic_model, time_in_h, smooth(pop_noisy, window=31), p0=[1.0, 0.5, 3.0, 0.0]
)
GrowthParams(*params_fitted)

GrowthParams(a=1.502, r=0.598, t0=9.978, shift_y=0.199)

Compare different growth models#

Fitting the noisy data from a logistic model with three models

Logistic fitted parameters: 1.502, 0.598, 9.978, 0.199
Gompertz fitted parameters: 1.792, 0.273, 7.936
Richards fitted parameters: 1.689, 0.943, 11.015, 4.100

/tmp/ipykernel_768/2318378229.py:30: RuntimeWarning: invalid value encountered in power
  return a / ((1 + nu * np.exp(-r * (t - lag))) ** power)

A huge condition number would indicate a problem with parameter identifiability

huge >>> 1e12

np.linalg.cond(pcov)

np.float64(8794.927476730772)

Prepare data for PioReactor-like analysis#

create datetime index
apply rolling median smoothing (could be Savitzky-Golay filtering)

Convert hours (range of 24h) to timedelta and move to start data

start_datetime = pd.Timestamp("2025-11-21 08:00")
time_series = pd.Series(pd.to_timedelta(time_in_h, unit="h")) + start_datetime
time_series = pd.Series(time_series, dtype="datetime64[s]")
time_series

    2025-11-21 08:00:00
    2025-11-21 08:00:30
    2025-11-21 08:01:00
    2025-11-21 08:01:30
    2025-11-21 08:02:00
               ...        
 2025-11-22 07:57:59
 2025-11-22 07:58:29
 2025-11-22 07:58:59
 2025-11-22 07:59:29
 2025-11-22 08:00:00
Length: 2880, dtype: datetime64[s]

# View data in format imported from PioReactor software output
# - use the rounded timestamp as index which is created as a preprocessing step

df = pd.DataFrame(
    {
        "timestamp_rounded": time_series,
        "Reactor": pop_noisy,
    }
).set_index("timestamp_rounded")
df

	Reactor
timestamp_rounded
2025-11-21 08:00:00	0.226061
2025-11-21 08:00:30	0.197506
2025-11-21 08:01:00	0.232892
2025-11-21 08:01:30	0.272301
2025-11-21 08:02:00	0.193247
...	...
2025-11-22 07:57:59	1.658720
2025-11-22 07:58:29	1.657515
2025-11-22 07:58:59	1.669609
2025-11-22 07:59:29	1.712810
2025-11-22 08:00:00	1.691233

2880 rows × 1 columns

Apply rolling median to smooth the data

31 consecutives timepoints are used, setting the 16th value as the smoothed value

rolling_window = 31  # in number of samples
min_periods = 15
df_rolling = df.rolling(
    rolling_window,
    min_periods=min_periods,
    center=True,
).median()
df_rolling

	Reactor
timestamp_rounded
2025-11-21 08:00:00	0.195386
2025-11-21 08:00:30	0.193266
2025-11-21 08:01:00	0.195386
2025-11-21 08:01:30	0.193266
2025-11-21 08:02:00	0.193257
...	...
2025-11-22 07:57:59	1.680756
2025-11-22 07:58:29	1.685787
2025-11-22 07:58:59	1.679566
2025-11-22 07:59:29	1.685787
2025-11-22 08:00:00	1.679566

2880 rows × 1 columns

Plot median smoothed data against Savitzky-Golay smoothed data

what happens in the case of outliers? (to be tested)
are results comparable?

For now we will continue with the rolling median smoothed data

ax = df_rolling.plot(figsize=(7.4, 4), legend=False)
_ = ax.legend(["Smoothed data: rolling median"])
pd.Series(smooth(df["Reactor"], window=31, passes=1), index=df.index).plot(ax=ax)
_ = ax.legend(["Smoothed data: rolling median", "Smoothed data: Savitzky-Golay"])

Fit a linear model on the data#

rolling window
maximum overall growth: maximum of first derivative
\(\mu_{max}\) growth is the maximum of the second derivative, or the maximum of the log transformed data slope (estimated with a sliding window)

series_window = df_rolling.squeeze().loc["2025-11-21 15:00":"2025-11-21 22:00"]

(Non-) transformed data#

/tmp/ipykernel_768/3664703078.py:19: UserWarning: This axis already has a converter set and is updating to a potentially incompatible converter
  ax.plot(df_rolling.index, tangent, "--", label="Tangent at max slope (±1h) (log2)")

maximum timepoint estimation with sliding window is not perfect. small errors can lead to deviations.

print(f"shift in x of logistic funtion: {lag_duration}")
max_idx - df_rolling.index[0]

shift in x of logistic funtion: 10

Timedelta('0 days 07:09:38')

center_time = df_rolling.index[0] + pd.Timedelta(hours=lag_duration)
print(f"center of logistic curves should be: {center_time}")
slopes_log2.squeeze().nlargest(5)

center of logistic curves should be: 2025-11-21 18:00:00

timestamp_rounded
2025-11-21 15:09:38    0.462154
2025-11-21 15:09:08    0.461847
2025-11-21 15:10:08    0.461843
2025-11-21 15:10:38    0.461357
2025-11-21 15:08:38    0.461355
Name: Reactor, dtype: float64

slopes_normal = s_normal.rolling(
    rolling_window + 100,
    min_periods=min_periods,
    center=True,
).apply(get_slope)
slopes_normal.nlargest(5)

timestamp_rounded
2025-11-21 18:21:42    0.233420
2025-11-21 18:21:12    0.233351
2025-11-21 18:22:12    0.233351
2025-11-21 18:20:42    0.233131
2025-11-21 18:22:42    0.233128
Name: Reactor, dtype: float64

<Figure size 800x500 with 1 Axes>

So, using the original (non-transformed) data, one can recover \(r\) from the slope if the carrying capacity \(K\) and the current value \(N(t)\) (or at least that the point is at the inflection, \(N = K/2\)) are known.

Growth rate in model:  0.600
Non-transformed slope:  0.233
Value of curve at max slope: 0.844
Non-transformed est. r with est. K:  0.553
Non-transformed est. r with true K:  0.622
Log2-transformed slope: 0.462
Log2-transformed est. r: 0.641

Median slope around center point (plus-minus 10mins)
non-transformed slope at center: 0.219
log-transformed slope at center: 0.328
Log2-transformed est. r: 0.455

Use spline to find the maxium growth#

just to highlight this approach

Fit spline to the smoothed data and calculate derivatives

use smoothing factor based on data length

spline_smoothing_value = get_smoothing_range(df_rolling.shape[0])
spline_smoothing_value

SmoothingRange(s_min=2804, s=2880, s_max=2955)

Identify time points where growth rate is in the top 90%

could be used to estimat the growth rate during maximum growth phase

Show code cell source

Hide code cell source

from pandas.api.types import is_datetime64_any_dtype
from scipy.interpolate import make_splrep, splev


def fit_spline_and_derivatives(
    s: pd.Series,
    smoothing_factor: float = 1000.0,
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Fit B-splines to each column in the DataFrame and compute specified derivatives.
    Values cannot be missing as NaNs, i.e. on rolling median of data.

    Parameters
    ----------
    s: pd.Series
        Input Series with time series data
    smoothing_factor: float
        Smoothing factor for the spline fitting.
    Returns:
        dict[str, pd.DataFrame]: Dictionary containing the fitted spline
                                 and its derivatives.
    """
    # drop NaN values
    s = s.dropna()

    if len(s) < 4:
        raise ValueError(
            "Not enough data points to fit a spline. Need at least 4 non-NaN values."
        )
    if not is_datetime64_any_dtype(s.index.dtype):
        raise TypeError("Index of the input Series must be datetime type.")
    x = (s.index - s.index[0]).total_seconds().to_numpy() / 3_600  # convert to hours

    bspl = make_splrep(
        x,
        s,
        s=smoothing_factor,
        k=3,
    )
    s_fitted = pd.Series(
        splev(x, bspl),
        index=s.index,
    )

    # for order in derivative_ord_ers:
    der = bspl.derivative(nu=1)
    s_first_derivative = pd.Series(
        der(x),
        index=s.index,
    )

    der2 = bspl.derivative(nu=2)
    s_second_derivative = pd.Series(
        der2(x),
        index=s.index,
    )

    return s_fitted, s_first_derivative, s_second_derivative


def plot_fitted_data(
    splines: pd.Series,
    der1: pd.Series,
    der2: pd.Series,
):
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 12))
    splines.plot(ax=ax, title="Fitted Spline", color="blue")
    ax2 = ax.twinx()
    der1.plot(ax=ax2, color="green", label="First Derivative")
    der2.plot(ax=ax2, color="orange", label="Second Derivative")
    # ax2.set_ylabel("Derivatives")
    # der1.plot(ax=ax2, title="First Derivative", color="green")
    # der2.plot(ax=ax2, title="Second Derivative", color="orange")
    ax.legend(["Fitted Spline"])
    ax2.legend(["First Derivative", "Second Derivative"])
    fig.tight_layout()
    return fig, ax


splines, der1, der2 = fit_spline_and_derivatives(
    df_rolling.squeeze(), smoothing_factor=5.0
)
fig, ax = plot_fitted_data(splines, der1, der2)

inflection_points = df_rolling.iloc[[der2.argmax(), der2.argmin()]]

max_min_growth_change = [der2.argmax(), der2.argmin()]
der1.iloc[max_min_growth_change]

timestamp_rounded
2025-11-21 14:00:07    0.113465
2025-11-21 20:00:15    0.139471
dtype: float64

inflection_points.index - df_rolling.index.min()

TimedeltaIndex(['0 days 06:00:07', '0 days 12:00:15'], dtype='timedelta64[s]', name='timestamp_rounded', freq=None)

	Reactor
count	2880.000000
mean	0.053146
std	0.080244
min	-0.169281
25%	0.002053
50%	0.077216
75%	0.122327
max	0.137387

t_to_max_in_h = (derivatives.idxmax() - derivatives.index.min()).dt.seconds / 3_600
t_to_max_in_h

Reactor    10.453611
dtype: float64

lag-phase duration should be estimated (maybe from first derivative plot?)
max population can be measured using OD
min population can be measured using OD

use od_max from df_rolling

Reactor    0.851437
dtype: float64

Find time range of maximum derivative and plot the region

def add_region_high_growth(ax, time_range, use_elapsed_time=False):
    """Add shaded region to indicate high growth phase."""
    start, end = time_range["start"], time_range["end"]
    if pd.isna(start) or pd.isna(end):
        return
    if use_elapsed_time:
        start = (start - df_rolling.index[0]).total_seconds() / 3600
        end = (end - df_rolling.index[0]).total_seconds() / 3600
    ax.axvspan(
        start,
        end,
        color="gray",
        alpha=0.3,
        label="High growth phase",
    )
    return ax


in_high_growth = derivatives.ge(cutoffs, axis=1)
max_range = find_max_range(in_high_growth["Reactor"])
fig, ax = plot_simulated_growth_curve(time_in_h, model_curve, pop_noisy)
_ = add_region_high_growth(ax, max_range, use_elapsed_time=True)

Without lag phase#

Now calculate exponential growth in the maximum growth area:

view = df_rolling.loc[max_range.start : max_range.end]
np.log(view).plot()

<Axes: xlabel='timestamp_rounded'>

Growth rate (slope): 0.2055 per hour
Intercept: -0.4008

<matplotlib.legend.Legend at 0x70ca13978410>

Croissance estimate#

Using croissance package to estimate growth parameters from biosustain: croissance

s_normal_in_h = (
    pd.Series(pop_noisy, time_in_h).rolling(31, min_periods=15, center=True).median()
)
s_normal_in_h

000000     0.195386
008336     0.193266
016672     0.195386
025009     0.193266
033345     0.193257
               ...   
966655    1.680756
974991    1.685787
983328    1.679566
991664    1.685787
000000    1.679566
Length: 2880, dtype: float64

ret = croissance.process_curve(s_normal_in_h)
gp = ret.growth_phases[0]
gp.slope, gp.intercept

/home/docs/checkouts/readthedocs.org/user_builds/piogrowth/envs/stable/lib/python3.13/site-packages/croissance/estimation/smoothing/segments.py:20: FutureWarning: The behavior of obj[i:j] with a float-dtype index is deprecated. In a future version, this will be treated as positional instead of label-based. For label-based slicing, use obj.loc[i:j] instead
  window = series[i : i + size * increment]
/home/docs/checkouts/readthedocs.org/user_builds/piogrowth/envs/stable/lib/python3.13/site-packages/croissance/estimation/smoothing/segments.py:61: FutureWarning: The behavior of obj[i:j] with a float-dtype index is deprecated. In a future version, this will be treated as positional instead of label-based. For label-based slicing, use obj.loc[i:j] instead
  window = series[start:end]

(np.float64(0.38623235823792945), np.float64(10.485852595456521))

fig, axes = croissance.figures.plot.plot_processed_curve(ret, yscale="both")

Growthcurves package#

Use the [growthcurves](https://pypi.org/project/growthcurves/ package to fit the logistic model and estimate parameters

_ = s_normal_in_h.plot(
    figsize=(8, 5), title="Smoothed growth curve for growthcurves pkg"
)

	max_od	N0	mu_max	intrinsic_growth_rate	doubling_time	exp_phase_start	exp_phase_end	time_at_umax	od_at_umax	fit_t_min	fit_t_max	fit_method	model_rmse	model_name
mech_logistic	1.700895	0.19915	0.29292	0.596981	2.366335	2.39456	13.977475	8.176353	0.580439	0.0	24.0	model_fitting_mech_logistic	0.009675	mech_logistic
mech_gompertz	1.803414	0.158867	0.321205	0.273271	2.157957	2.128178	14.497329	5.723447	0.346614	0.0	24.0	model_fitting_mech_gompertz	0.067032	mech_gompertz
mech_richards	1.701699	0.192303	0.285704	0.54994	2.426102	1.996298	14.03032	8.176353	0.583992	0.0	24.0	model_fitting_mech_richards	0.01031	mech_richards
mech_baranyi	1.700947	0.192342	0.292886	0.596198	2.366612	0.995168	13.97873	8.176353	0.580576	0.0	24.0	model_fitting_mech_baranyi	0.00966	mech_baranyi
phenom_logistic	1.703093	0.200115	0.299711	None	2.312721	4.635667	11.780248	8.224449	0.586686	0.0	24.0	model_fitting_phenom_logistic	0.010118	phenom_logistic
phenom_gompertz	1.71383	0.229502	0.345445	None	2.006532	5.458385	11.278627	7.599198	0.480797	0.0	24.0	model_fitting_phenom_gompertz	0.019375	phenom_gompertz
phenom_gompertz_modified	1.706141	0.22735	0.334868	None	2.06991	5.171202	11.320117	7.503006	0.475212	0.0	24.0	model_fitting_phenom_gompertz_modified	0.016237	phenom_gompertz_modified
phenom_richards	1.701812	0.193621	0.295234	None	2.347786	4.471266	11.833378	8.272545	0.594766	0.0	24.0	model_fitting_phenom_richards	0.009952	phenom_richards
spline	1.721717	0.193828	0.453677	None	1.527844	5.252174	10.196329	7.236181	0.449496	0.0	24.0	model_fitting_spline	0.012405	spline
sliding_window	1.721717	0.193828	0.303536	None	2.283572	4.31411	11.703825	7.365057	0.461325	6.744008	7.986106	model_fitting_sliding_window	0.022418	sliding_window

growth_stats[
    [
        "time_at_umax",
        "od_at_umax",
        "mu_max",
        "intrinsic_growth_rate",
        "doubling_time",
        "exp_phase_start",
        "exp_phase_end",
        "model_name",
        "max_od",
        "N0",
    ]
]

	time_at_umax	od_at_umax	mu_max	intrinsic_growth_rate	doubling_time	exp_phase_start	exp_phase_end	model_name	max_od	N0
mech_logistic	8.176353	0.580439	0.29292	0.596981	2.366335	2.39456	13.977475	mech_logistic	1.700895	0.19915
mech_gompertz	5.723447	0.346614	0.321205	0.273271	2.157957	2.128178	14.497329	mech_gompertz	1.803414	0.158867
mech_richards	8.176353	0.583992	0.285704	0.54994	2.426102	1.996298	14.03032	mech_richards	1.701699	0.192303
mech_baranyi	8.176353	0.580576	0.292886	0.596198	2.366612	0.995168	13.97873	mech_baranyi	1.700947	0.192342
phenom_logistic	8.224449	0.586686	0.299711	None	2.312721	4.635667	11.780248	phenom_logistic	1.703093	0.200115
phenom_gompertz	7.599198	0.480797	0.345445	None	2.006532	5.458385	11.278627	phenom_gompertz	1.71383	0.229502
phenom_gompertz_modified	7.503006	0.475212	0.334868	None	2.06991	5.171202	11.320117	phenom_gompertz_modified	1.706141	0.22735
phenom_richards	8.272545	0.594766	0.295234	None	2.347786	4.471266	11.833378	phenom_richards	1.701812	0.193621
spline	7.236181	0.449496	0.453677	None	1.527844	5.252174	10.196329	spline	1.721717	0.193828
sliding_window	7.365057	0.461325	0.303536	None	2.283572	4.31411	11.703825	sliding_window	1.721717	0.193828