Estimating growth rates#

Outline approach used in the app to estimate the growth rate of biological growth curves with lag phase using growthcurves package

Here we compare different packages, explain some differences between parameters using a simulated logistic growth curve with noise.

  • parametric model based fitting of noisy data

  • linear regression on non log-transformed data

  • linear regression on log-transformed data

  • spline fitting and derivatives

  • croissance exponential model fitting

  • growthcurves package fitting, comparing all methods

The \(\mu_{max}\) growth is the maximum of the second derivative or the maximum of the log transformed data slope (if estimated with a sliding window).

Setup#

Imports

Hide code cell source

from typing import Iterable, NamedTuple

import croissance
import croissance.figures.plot
import growthcurves as gc
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
from scipy.signal import savgol_filter
from sklearn.linear_model import LinearRegression

from piogrowth.durations import find_max_range
from piogrowth.fit_spline import (
    fit_spline_and_derivatives_one_batch,
    get_smoothing_range,
)

Hide code cell source

class GrowthParams(NamedTuple):
    a: float
    r: float
    t0: float
    shift_y: float

    def __repr__(self):
        return (
            f"GrowthParams(a={self.a:.3f}, r={self.r:.3f},"
            f" t0={self.t0:.3f}, shift_y={self.shift_y:.3f})"
        )


def generate_growth_curve(
    time_points: np.array,
    lag_duration: float = 2.0,
    growth_rate: float = 0.5,
    max_population: float = 1.0,
    shift: float = 0.0,
    noise_level: float = 0.02,
    random_seed: int = None,
    non_negative: bool = True,
    log_transform: bool = False,
    epsilon: float = 0.0001,
):
    """
    Generate biological growth curve with lag phase using modified logistic equation.

    Standard logistic: N(t) = K / (1 + ((K-N₀)/N₀) * exp(-r*t))
    With lag phase: Growth starts at effective time t_eff = t - lag_duration

    Parameters:
    -----------
    time_points : array-like
        Time points at which to evaluate the growth curve
    lag_duration : float
        Duration of the lag phase (same units as time_points)
    growth_rate : float
        Intrinsic/maximum specific growth rate r (1/time)
    max_population : float
        Carrying capacity K / maximum population size
    shift : float
        Vertical shift (e.g., for background OD)
    noise_level : float
        Standard deviation of Gaussian noise
    random_seed : int, optional
        Random seed for reproducibility
    non_negative : bool
        Whether to enforce non-negative population values
    log_transform : bool
        Whether to apply natural log transform to the output
    epsilon : float
        Small offset added before log to avoid log(0)

    Returns:
    --------
    population : numpy array
        Population values with noise at each time point
    population_clean : numpy array
        Population values without noise (for comparison)
    """
    if random_seed is not None:
        np.random.seed(random_seed)

    time_points = np.array(time_points)

    # Logistic growth equation with lag phase
    # During lag: keep population near initial with minimal growth
    # After lag: apply standard logistic starting from t_eff = 0
    adjusted_time = time_points - lag_duration
    population_clean = max_population / (1 + np.exp(-growth_rate * adjusted_time))

    # Add vertical shift (e.g., background OD reading)
    if shift > 0.0:
        population_clean = population_clean + shift

    # Add Gaussian noise
    noise = np.random.normal(
        0,
        noise_level * max_population,
        size=len(time_points),
    )
    population = population_clean + noise

    # Ensure non-negative
    if non_negative:
        population = np.maximum(population, epsilon)

    # Optional log transform (for exponential phase detection)
    if log_transform:
        population = np.log(population + epsilon)
        population_clean = np.log(population_clean + epsilon)

    return population, population_clean


def plot_simulated_growth_curve(
    time: Iterable[float],
    pop_clean,
    pop_noisy,
):
    fig, ax = plt.subplots(figsize=(10, 6))
    ax.plot(time, pop_clean, "b-", linewidth=2, label="Clean signal")
    ax.plot(time, pop_noisy, "r.", alpha=0.5, label="With noise")
    ax.set_xlabel("Time (hours)")
    ax.set_ylabel("Population (OD or cells/mL)")
    ax.set_title("Biological Growth Curve with Lag Phase")
    ax.legend()
    ax.grid(True, alpha=0.3)
    fig.tight_layout()
    return fig, ax


def smooth(y: np.ndarray[float], window: int = 21, poly: int = 1, passes: int = 1):
    """Smooth a series with Savitzky-Golay filtering (odd window, multi-pass)."""
    n = y.size
    if n < 7:
        return y
    w = int(window) | 1  # odd
    w = min(w, n if n % 2 else n - 1)
    p = min(int(poly), w - 1)
    for _ in range(int(passes)):
        y = savgol_filter(y, w, p, mode="interp")
    return y


def timeindex_to_hours(index: pd.DatetimeIndex):
    """
    Convert a pandas DatetimeIndex to elapsed hours since the first timestamp.

    Parameters
    ----------
    index : pd.DatetimeIndex
        The DatetimeIndex to convert.

    Returns
    -------
    numpy.ndarray
        Array of elapsed hours (float) since the first timestamp in the index,
        reshaped as a column vector.
    """
    (index - index[0]).total_seconds().to_numpy() / 3600
    return


def fit_linear_growth(s: pd.Series):
    """
    Fit a linear regression model to a time series window.

    Parameters
    ----------
    series_window : pd.Series
        Time-indexed series of population measurements.

    Returns
    -------
    linreg : LinearRegression
        Fitted linear regression model.
    """
    X = (s.index - s.index[0]).total_seconds().to_numpy().reshape(-1, 1) / 3600  # hours
    y = s.values.reshape(-1, 1)
    linreg = LinearRegression().fit(X, y)
    return linreg


def get_slope(s: pd.Series):
    linreg_fitted = fit_linear_growth(s)
    return linreg_fitted.coef_[0][0]


def get_tangent(s, max_idx, max_slope, window_hours=1):
    """
    Compute the tangent line around the maximum slope point in log2-transformed data.

    Parameters
    ----------
    s : pd.Series
        Data with datetime index.
    max_idx : pd.Timestamp
        Index of the maximum slope point.
    s_log2 : pd.DataFrame or pd.Series
        Log2-transformed smoothed data.
    max_slope : float
        Maximum slope value.
    window_hours : float, optional
        Window size in hours around max_idx to show the tangent (default: 1).

    Returns
    -------
    tangent : np.ndarray
        Array with tangent values (NaN outside the window).
    """
    x = (s.index - s.index[0]).total_seconds() / 3600  # hours
    x0 = (max_idx - s.index[0]).total_seconds() / 3600
    y0 = (
        s.loc[max_idx].values[0]
        if hasattr(s.loc[max_idx], "values")
        else s.loc[max_idx]
    )

    # Mask for ±window_hours around max_idx
    mask = (x >= x0 - window_hours) & (x <= x0 + window_hours)
    tangent = np.full_like(x, np.nan, dtype=float)
    tangent[mask] = max_slope * (x[mask] - x0) + y0
    return tangent

Model equations

  • use non-ODE formulation in comparison to the growthcurves package

Hide code cell source

def logistic_model(t, a, r, t0, shift=0.0):
    """Idealized growth curve model with lag phase."""
    u = np.exp(-r * (t - t0))
    return a / (1 + u) + shift


def logistic_model_d1(t, a, r, t0):
    """Idealized first-derivative model for growth curves."""
    u = np.exp(-r * (t - t0))
    return a * r * (u / (1 + u) ** 2)


def logistic_model_d2(t, a, r, t0):
    """Idealized second-derivative model for growth curves."""
    u = np.exp(-r * (t - t0))
    return a * r * (u * (u - 1) / (1 + u) ** 3)


def gompertz_model(t, a, r, lag):
    """Gompertz growth model: y(t) = a * exp(-exp(-r * (t - lag)))"""
    return a * np.exp(-np.exp(-r * (t - lag)))


def richards_model(t, a, r, lag, nu):
    """Richards growth model (generalized logistic with shape parameter nu)

    Generalized logistic function: https://en.wikipedia.org/wiki/Generalised_logistic_function
    """
    power = 1 / nu
    return a / ((1 + nu * np.exp(-r * (t - lag))) ** power)

S-Shaped growth curve (Logistic)#

Generate time points:

  • 2,880 would we every 30seconds for a day

  • 17,280 would be every 5seconds for a day

Hide code cell source

time_in_h = np.linspace(0, 24, 2880)
# Generate growth curve
max_population = 1.5
lag_duration = 10
growth_rate = 0.6
shift_y = 0.2
noise_level = 0.03

pop_noisy, model_curve = generate_growth_curve(
    time_points=time_in_h,
    lag_duration=lag_duration,
    growth_rate=growth_rate,
    max_population=max_population,
    shift=shift_y,
    noise_level=noise_level,
    random_seed=42,
)

Plot the model with the parameters specified above

fig, ax = plot_simulated_growth_curve(time_in_h, model_curve, pop_noisy)

Curve fitting on noisy data#

Fitted parameters with noisy data

params_fitted, pcov = curve_fit(
    logistic_model, time_in_h, pop_noisy, p0=[1.0, 0.5, 3.0, 0.0]
)
GrowthParams(*params_fitted)
GrowthParams(a=1.502, r=0.598, t0=9.978, shift_y=0.199)

Fitted parameters with smoothed data (removing some noise)

  • difference is here minimal with low and homoscedastic noise

params_fitted, pcov = curve_fit(
    logistic_model, time_in_h, smooth(pop_noisy, window=31), p0=[1.0, 0.5, 3.0, 0.0]
)
GrowthParams(*params_fitted)
GrowthParams(a=1.502, r=0.598, t0=9.978, shift_y=0.199)

Compare different growth models#

Fitting the noisy data from a logistic model with three models

Hide code cell source

# to move to pkg

model_functions = {
    "Logistic": logistic_model,
    "Gompertz": gompertz_model,
    "Richards": richards_model,
}

inital_parameters = {
    "Logistic": [1.0, 0.5, 3.0, 0.0],
    "Gompertz": [1.0, 0.5, 3.0],
    "Richards": [1.0, 0.5, 3.0, 2.0],
}

fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(time_in_h, model_curve, alpha=1, label="model (w./o. noise)")

for model_name, model_func in model_functions.items():
    p0 = inital_parameters[model_name]
    params_fitted, pcov = curve_fit(model_func, time_in_h, pop_noisy, p0=p0)
    model_fit = model_func(time_in_h, *params_fitted)
    ax.plot(time_in_h, model_fit, label=f"Fitted: {model_name}", alpha=0.3)
    msg_params_fitted = ", ".join(f"{_p:.3f}" for _p in params_fitted)
    print(f"{model_name} fitted parameters: {msg_params_fitted}")
_ = ax.legend()
Logistic fitted parameters: 1.502, 0.598, 9.978, 0.199
Gompertz fitted parameters: 1.792, 0.273, 7.936
Richards fitted parameters: 1.689, 0.943, 11.015, 4.100
/tmp/ipykernel_768/2318378229.py:30: RuntimeWarning: invalid value encountered in power
  return a / ((1 + nu * np.exp(-r * (t - lag))) ** power)

A huge condition number would indicate a problem with parameter identifiability

  • huge >>> 1e12

np.linalg.cond(pcov)
np.float64(8794.927476730772)

Prepare data for PioReactor-like analysis#

  • create datetime index

  • apply rolling median smoothing (could be Savitzky-Golay filtering)

Convert hours (range of 24h) to timedelta and move to start data

start_datetime = pd.Timestamp("2025-11-21 08:00")
time_series = pd.Series(pd.to_timedelta(time_in_h, unit="h")) + start_datetime
time_series = pd.Series(time_series, dtype="datetime64[s]")
time_series
0      2025-11-21 08:00:00
1      2025-11-21 08:00:30
2      2025-11-21 08:01:00
3      2025-11-21 08:01:30
4      2025-11-21 08:02:00
               ...        
2875   2025-11-22 07:57:59
2876   2025-11-22 07:58:29
2877   2025-11-22 07:58:59
2878   2025-11-22 07:59:29
2879   2025-11-22 08:00:00
Length: 2880, dtype: datetime64[s]
# View data in format imported from PioReactor software output
# - use the rounded timestamp as index which is created as a preprocessing step
df = pd.DataFrame(
    {
        "timestamp_rounded": time_series,
        "Reactor": pop_noisy,
    }
).set_index("timestamp_rounded")
df
Reactor
timestamp_rounded
2025-11-21 08:00:00 0.226061
2025-11-21 08:00:30 0.197506
2025-11-21 08:01:00 0.232892
2025-11-21 08:01:30 0.272301
2025-11-21 08:02:00 0.193247
... ...
2025-11-22 07:57:59 1.658720
2025-11-22 07:58:29 1.657515
2025-11-22 07:58:59 1.669609
2025-11-22 07:59:29 1.712810
2025-11-22 08:00:00 1.691233

2880 rows × 1 columns

Apply rolling median to smooth the data

  • 31 consecutives timepoints are used, setting the 16th value as the smoothed value

rolling_window = 31  # in number of samples
min_periods = 15
df_rolling = df.rolling(
    rolling_window,
    min_periods=min_periods,
    center=True,
).median()
df_rolling
Reactor
timestamp_rounded
2025-11-21 08:00:00 0.195386
2025-11-21 08:00:30 0.193266
2025-11-21 08:01:00 0.195386
2025-11-21 08:01:30 0.193266
2025-11-21 08:02:00 0.193257
... ...
2025-11-22 07:57:59 1.680756
2025-11-22 07:58:29 1.685787
2025-11-22 07:58:59 1.679566
2025-11-22 07:59:29 1.685787
2025-11-22 08:00:00 1.679566

2880 rows × 1 columns

Plot median smoothed data against Savitzky-Golay smoothed data

  • what happens in the case of outliers? (to be tested)

  • are results comparable?

For now we will continue with the rolling median smoothed data

ax = df_rolling.plot(figsize=(7.4, 4), legend=False)
_ = ax.legend(["Smoothed data: rolling median"])
pd.Series(smooth(df["Reactor"], window=31, passes=1), index=df.index).plot(ax=ax)
_ = ax.legend(["Smoothed data: rolling median", "Smoothed data: Savitzky-Golay"])

Fit a linear model on the data#

  • rolling window

  • maximum overall growth: maximum of first derivative

  • \(\mu_{max}\) growth is the maximum of the second derivative, or the maximum of the log transformed data slope (estimated with a sliding window)

series_window = df_rolling.squeeze().loc["2025-11-21 15:00":"2025-11-21 22:00"]

(Non-) transformed data#

Hide code cell source

s_log2 = np.log2(df_rolling.squeeze())
s_normal = df_rolling.squeeze()
slopes_log2 = s_log2.rolling(
    rolling_window + 100,
    min_periods=min_periods,
    center=True,
).apply(get_slope)

# Find max slope and its location
max_idx = slopes_log2.idxmax()
max_slope = slopes_log2.max()

tangent = get_tangent(s_log2, max_idx=max_idx, max_slope=max_slope)

# Plot original data and tangent at max (only in window)
fig, ax = plt.subplots(figsize=(8, 5))
s_normal.plot(ax=ax, label="Original data (rolling median)")
s_log2.plot(ax=ax, label="Log2 transformed data")
ax.plot(df_rolling.index, tangent, "--", label="Tangent at max slope (±1h) (log2)")
ax.scatter([max_idx], [s_log2.loc[max_idx]], color="red", zorder=3)
ax.legend()
ax.set_title("Original data with tangent at maximum slope")
plt.show()
/tmp/ipykernel_768/3664703078.py:19: UserWarning: This axis already has a converter set and is updating to a potentially incompatible converter
  ax.plot(df_rolling.index, tangent, "--", label="Tangent at max slope (±1h) (log2)")

maximum timepoint estimation with sliding window is not perfect. small errors can lead to deviations.

print(f"shift in x of logistic funtion: {lag_duration}")
max_idx - df_rolling.index[0]
shift in x of logistic funtion: 10
Timedelta('0 days 07:09:38')
center_time = df_rolling.index[0] + pd.Timedelta(hours=lag_duration)
print(f"center of logistic curves should be: {center_time}")
slopes_log2.squeeze().nlargest(5)
center of logistic curves should be: 2025-11-21 18:00:00
timestamp_rounded
2025-11-21 15:09:38    0.462154
2025-11-21 15:09:08    0.461847
2025-11-21 15:10:08    0.461843
2025-11-21 15:10:38    0.461357
2025-11-21 15:08:38    0.461355
Name: Reactor, dtype: float64
slopes_normal = s_normal.rolling(
    rolling_window + 100,
    min_periods=min_periods,
    center=True,
).apply(get_slope)
slopes_normal.nlargest(5)
timestamp_rounded
2025-11-21 18:21:42    0.233420
2025-11-21 18:21:12    0.233351
2025-11-21 18:22:12    0.233351
2025-11-21 18:20:42    0.233131
2025-11-21 18:22:42    0.233128
Name: Reactor, dtype: float64

Hide code cell source

max_idx = slopes_normal.idxmax()
max_slope = slopes_normal.max()

tangent = get_tangent(s_normal, max_idx=max_idx, max_slope=max_slope)
ax.plot(s_normal.index, tangent, "--", label="Tangent at max slope (±1h)")
ax.scatter(
    [max_idx],
    [s_normal.loc[max_idx]],
    color="red",
    zorder=4,
)
ax.legend()
fig
<Figure size 800x500 with 1 Axes>

So, using the original (non-transformed) data, one can recover \(r\) from the slope if the carrying capacity \(K\) and the current value \(N(t)\) (or at least that the point is at the inflection, \(N = K/2\)) are known.

Hide code cell source

print(f"Growth rate in model:  {growth_rate:.3f}")
print(f"Non-transformed slope:  {slopes_normal.max():.3f}")
K_half = s_normal.loc[slopes_normal.idxmax()] - s_normal.nsmallest(10).median()
print(f"Value of curve at max slope: {K_half:.3f}")
print(
    f"Non-transformed est. r with est. K:  {slopes_normal.max() * 4 / (2 * K_half):.3f}"
)
print(
    "Non-transformed est. r with true K:  "
    f"{slopes_normal.max() * 4 / (max_population):.3f}"
)
print(f"Log2-transformed slope: {slopes_log2.max():.3f}")
print(f"Log2-transformed est. r: {slopes_log2.max() *2 * np.log(2):.3f}")
Growth rate in model:  0.600
Non-transformed slope:  0.233
Value of curve at max slope: 0.844
Non-transformed est. r with est. K:  0.553
Non-transformed est. r with true K:  0.622
Log2-transformed slope: 0.462
Log2-transformed est. r: 0.641

Hide code cell source

print("Median slope around center point (plus-minus 10mins)")
_v = slopes_normal.squeeze().loc["2025-11-21 17:50":"2025-11-21 18:10"].median()
print(f"non-transformed slope at center: {_v:.3f}")
_v = slopes_log2.squeeze().loc["2025-11-21 17:50":"2025-11-21 18:10"].median()
print(f"log-transformed slope at center: {_v:.3f}")
print(f"Log2-transformed est. r: {_v * 2 * np.log(2):.3f}")
Median slope around center point (plus-minus 10mins)
non-transformed slope at center: 0.219
log-transformed slope at center: 0.328
Log2-transformed est. r: 0.455

Use spline to find the maxium growth#

  • just to highlight this approach

Fit spline to the smoothed data and calculate derivatives

  • use smoothing factor based on data length

spline_smoothing_value = get_smoothing_range(df_rolling.shape[0])
spline_smoothing_value
SmoothingRange(s_min=2804, s=2880, s_max=2955)

Identify time points where growth rate is in the top 90%

  • could be used to estimat the growth rate during maximum growth phase

Hide code cell source

from pandas.api.types import is_datetime64_any_dtype
from scipy.interpolate import make_splrep, splev


def fit_spline_and_derivatives(
    s: pd.Series,
    smoothing_factor: float = 1000.0,
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Fit B-splines to each column in the DataFrame and compute specified derivatives.
    Values cannot be missing as NaNs, i.e. on rolling median of data.

    Parameters
    ----------
    s: pd.Series
        Input Series with time series data
    smoothing_factor: float
        Smoothing factor for the spline fitting.
    Returns:
        dict[str, pd.DataFrame]: Dictionary containing the fitted spline
                                 and its derivatives.
    """
    # drop NaN values
    s = s.dropna()

    if len(s) < 4:
        raise ValueError(
            "Not enough data points to fit a spline. Need at least 4 non-NaN values."
        )
    if not is_datetime64_any_dtype(s.index.dtype):
        raise TypeError("Index of the input Series must be datetime type.")
    x = (s.index - s.index[0]).total_seconds().to_numpy() / 3_600  # convert to hours

    bspl = make_splrep(
        x,
        s,
        s=smoothing_factor,
        k=3,
    )
    s_fitted = pd.Series(
        splev(x, bspl),
        index=s.index,
    )

    # for order in derivative_ord_ers:
    der = bspl.derivative(nu=1)
    s_first_derivative = pd.Series(
        der(x),
        index=s.index,
    )

    der2 = bspl.derivative(nu=2)
    s_second_derivative = pd.Series(
        der2(x),
        index=s.index,
    )

    return s_fitted, s_first_derivative, s_second_derivative


def plot_fitted_data(
    splines: pd.Series,
    der1: pd.Series,
    der2: pd.Series,
):
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 12))
    splines.plot(ax=ax, title="Fitted Spline", color="blue")
    ax2 = ax.twinx()
    der1.plot(ax=ax2, color="green", label="First Derivative")
    der2.plot(ax=ax2, color="orange", label="Second Derivative")
    # ax2.set_ylabel("Derivatives")
    # der1.plot(ax=ax2, title="First Derivative", color="green")
    # der2.plot(ax=ax2, title="Second Derivative", color="orange")
    ax.legend(["Fitted Spline"])
    ax2.legend(["First Derivative", "Second Derivative"])
    fig.tight_layout()
    return fig, ax


splines, der1, der2 = fit_spline_and_derivatives(
    df_rolling.squeeze(), smoothing_factor=5.0
)
fig, ax = plot_fitted_data(splines, der1, der2)
inflection_points = df_rolling.iloc[[der2.argmax(), der2.argmin()]]
max_min_growth_change = [der2.argmax(), der2.argmin()]
der1.iloc[max_min_growth_change]
timestamp_rounded
2025-11-21 14:00:07    0.113465
2025-11-21 20:00:15    0.139471
dtype: float64
inflection_points.index - df_rolling.index.min()
TimedeltaIndex(['0 days 06:00:07', '0 days 12:00:15'], dtype='timedelta64[s]', name='timestamp_rounded', freq=None)

Hide code cell source

high_percentage_treshold = 95
splines, derivatives = fit_spline_and_derivatives_one_batch(
    df_rolling,
    smoothing_factor=spline_smoothing_value.s,
)
prop_high = high_percentage_treshold / 100
cutoffs = derivatives.max() * prop_high
in_high_growth = derivatives.ge(cutoffs, axis=1)
max_time_range = in_high_growth.apply(find_max_range, axis=0).T.convert_dtypes()
derivatives.describe()
Reactor
count 2880.000000
mean 0.053146
std 0.080244
min -0.169281
25% 0.002053
50% 0.077216
75% 0.122327
max 0.137387
t_to_max_in_h = (derivatives.idxmax() - derivatives.index.min()).dt.seconds / 3_600
t_to_max_in_h
Reactor    10.453611
dtype: float64
  • lag-phase duration should be estimated (maybe from first derivative plot?)

  • max population can be measured using OD

  • min population can be measured using OD

use od_max from df_rolling

Hide code cell source

t_max_corrected = t_to_max_in_h - lag_duration
od_max = df_rolling.loc[derivatives.idxmax()].squeeze()
max_population / (
    1
    # scaling term is interesting
    + np.exp(-growth_rate * (t_max_corrected))
)
Reactor    0.851437
dtype: float64

Find time range of maximum derivative and plot the region

def add_region_high_growth(ax, time_range, use_elapsed_time=False):
    """Add shaded region to indicate high growth phase."""
    start, end = time_range["start"], time_range["end"]
    if pd.isna(start) or pd.isna(end):
        return
    if use_elapsed_time:
        start = (start - df_rolling.index[0]).total_seconds() / 3600
        end = (end - df_rolling.index[0]).total_seconds() / 3600
    ax.axvspan(
        start,
        end,
        color="gray",
        alpha=0.3,
        label="High growth phase",
    )
    return ax


in_high_growth = derivatives.ge(cutoffs, axis=1)
max_range = find_max_range(in_high_growth["Reactor"])
fig, ax = plot_simulated_growth_curve(time_in_h, model_curve, pop_noisy)
_ = add_region_high_growth(ax, max_range, use_elapsed_time=True)

Without lag phase#

Now calculate exponential growth in the maximum growth area:

view = df_rolling.loc[max_range.start : max_range.end]
np.log(view).plot()
<Axes: xlabel='timestamp_rounded'>

Hide code cell source

od_log = np.log(view["Reactor"])
X = (od_log.index - od_log.index[0]).total_seconds().values.reshape(
    -1, 1
) / 3600  # hours
y = od_log.values.reshape(-1, 1)

reg = LinearRegression().fit(X, y)
slope = reg.coef_[0][0]
intercept = reg.intercept_[0]

print(f"Growth rate (slope): {slope:.4f} per hour")
print(f"Intercept: {intercept:.4f}")

# Plot regression line
fig, ax = plt.subplots(figsize=(7, 4))
ax.plot(X, y, "o", label="Log(OD)")
ax.plot(X, reg.predict(X), "r-", label="Linear fit")
ax.set_xlabel("Time (hours)")
ax.set_ylabel("Log(OD)")
ax.legend()
Growth rate (slope): 0.2055 per hour
Intercept: -0.4008
<matplotlib.legend.Legend at 0x70ca13978410>

Croissance estimate#

Using croissance package to estimate growth parameters from biosustain: croissance

s_normal_in_h = (
    pd.Series(pop_noisy, time_in_h).rolling(31, min_periods=15, center=True).median()
)
s_normal_in_h
0.000000     0.195386
0.008336     0.193266
0.016672     0.195386
0.025009     0.193266
0.033345     0.193257
               ...   
23.966655    1.680756
23.974991    1.685787
23.983328    1.679566
23.991664    1.685787
24.000000    1.679566
Length: 2880, dtype: float64
ret = croissance.process_curve(s_normal_in_h)
gp = ret.growth_phases[0]
gp.slope, gp.intercept
/home/docs/checkouts/readthedocs.org/user_builds/piogrowth/envs/stable/lib/python3.13/site-packages/croissance/estimation/smoothing/segments.py:20: FutureWarning: The behavior of obj[i:j] with a float-dtype index is deprecated. In a future version, this will be treated as positional instead of label-based. For label-based slicing, use obj.loc[i:j] instead
  window = series[i : i + size * increment]
/home/docs/checkouts/readthedocs.org/user_builds/piogrowth/envs/stable/lib/python3.13/site-packages/croissance/estimation/smoothing/segments.py:61: FutureWarning: The behavior of obj[i:j] with a float-dtype index is deprecated. In a future version, this will be treated as positional instead of label-based. For label-based slicing, use obj.loc[i:j] instead
  window = series[start:end]
(np.float64(0.38623235823792945), np.float64(10.485852595456521))
fig, axes = croissance.figures.plot.plot_processed_curve(ret, yscale="both")

Growthcurves package#

Use the [growthcurves](https://pypi.org/project/growthcurves/ package to fit the logistic model and estimate parameters

_ = s_normal_in_h.plot(
    figsize=(8, 5), title="Smoothed growth curve for growthcurves pkg"
)

Hide code cell source

fit_stats, growth_stats = gc.inference.compare_methods(
    t=s_normal_in_h.index.values,
    N=s_normal_in_h.values,
    model_family="all",
    # spline="fast",  # ! needs updating in growthcurves package
    window_points=150,
)
growth_stats = pd.DataFrame(growth_stats).T
growth_stats
max_od N0 mu_max intrinsic_growth_rate doubling_time exp_phase_start exp_phase_end time_at_umax od_at_umax fit_t_min fit_t_max fit_method model_rmse model_name
mech_logistic 1.700895 0.19915 0.29292 0.596981 2.366335 2.39456 13.977475 8.176353 0.580439 0.0 24.0 model_fitting_mech_logistic 0.009675 mech_logistic
mech_gompertz 1.803414 0.158867 0.321205 0.273271 2.157957 2.128178 14.497329 5.723447 0.346614 0.0 24.0 model_fitting_mech_gompertz 0.067032 mech_gompertz
mech_richards 1.701699 0.192303 0.285704 0.54994 2.426102 1.996298 14.03032 8.176353 0.583992 0.0 24.0 model_fitting_mech_richards 0.01031 mech_richards
mech_baranyi 1.700947 0.192342 0.292886 0.596198 2.366612 0.995168 13.97873 8.176353 0.580576 0.0 24.0 model_fitting_mech_baranyi 0.00966 mech_baranyi
phenom_logistic 1.703093 0.200115 0.299711 None 2.312721 4.635667 11.780248 8.224449 0.586686 0.0 24.0 model_fitting_phenom_logistic 0.010118 phenom_logistic
phenom_gompertz 1.71383 0.229502 0.345445 None 2.006532 5.458385 11.278627 7.599198 0.480797 0.0 24.0 model_fitting_phenom_gompertz 0.019375 phenom_gompertz
phenom_gompertz_modified 1.706141 0.22735 0.334868 None 2.06991 5.171202 11.320117 7.503006 0.475212 0.0 24.0 model_fitting_phenom_gompertz_modified 0.016237 phenom_gompertz_modified
phenom_richards 1.701812 0.193621 0.295234 None 2.347786 4.471266 11.833378 8.272545 0.594766 0.0 24.0 model_fitting_phenom_richards 0.009952 phenom_richards
spline 1.721717 0.193828 0.453677 None 1.527844 5.252174 10.196329 7.236181 0.449496 0.0 24.0 model_fitting_spline 0.012405 spline
sliding_window 1.721717 0.193828 0.303536 None 2.283572 4.31411 11.703825 7.365057 0.461325 6.744008 7.986106 model_fitting_sliding_window 0.022418 sliding_window
growth_stats[
    [
        "time_at_umax",
        "od_at_umax",
        "mu_max",
        "intrinsic_growth_rate",
        "doubling_time",
        "exp_phase_start",
        "exp_phase_end",
        "model_name",
        "max_od",
        "N0",
    ]
]
time_at_umax od_at_umax mu_max intrinsic_growth_rate doubling_time exp_phase_start exp_phase_end model_name max_od N0
mech_logistic 8.176353 0.580439 0.29292 0.596981 2.366335 2.39456 13.977475 mech_logistic 1.700895 0.19915
mech_gompertz 5.723447 0.346614 0.321205 0.273271 2.157957 2.128178 14.497329 mech_gompertz 1.803414 0.158867
mech_richards 8.176353 0.583992 0.285704 0.54994 2.426102 1.996298 14.03032 mech_richards 1.701699 0.192303
mech_baranyi 8.176353 0.580576 0.292886 0.596198 2.366612 0.995168 13.97873 mech_baranyi 1.700947 0.192342
phenom_logistic 8.224449 0.586686 0.299711 None 2.312721 4.635667 11.780248 phenom_logistic 1.703093 0.200115
phenom_gompertz 7.599198 0.480797 0.345445 None 2.006532 5.458385 11.278627 phenom_gompertz 1.71383 0.229502
phenom_gompertz_modified 7.503006 0.475212 0.334868 None 2.06991 5.171202 11.320117 phenom_gompertz_modified 1.706141 0.22735
phenom_richards 8.272545 0.594766 0.295234 None 2.347786 4.471266 11.833378 phenom_richards 1.701812 0.193621
spline 7.236181 0.449496 0.453677 None 1.527844 5.252174 10.196329 spline 1.721717 0.193828
sliding_window 7.365057 0.461325 0.303536 None 2.283572 4.31411 11.703825 sliding_window 1.721717 0.193828