How to Identify Memory Issues in Bokeh Applications

Memory issues in Bokeh applications can manifest as browser crashes, slow rendering, server instability, or gradual RAM consumption growthโ€”especially when using Jupyter notebooks, large datasets, or complex interactive dashboards. This comprehensive guide provides actionable techniques for identifying, diagnosing, and resolving memory problems using proven Python profiling tools and Bokeh-specific debugging strategies.

โš ๏ธ Common Bokeh Memory Problem Symptoms:

  • Chrome/Firefox consuming 400MB+ per plot execution in Jupyter
  • Bokeh server process memory growing continuously without cleanup
  • “Extra unexpected referrers” errors in Bokeh logs
  • Browser tabs becoming unresponsive with large datasets

Essential Memory Profiling Tools for Bokeh Applications

1. tracemalloc (Built-in Python 3.4+)

Best for: Line-by-line memory allocation tracking and identifying specific code causing memory growth

import tracemalloc
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import numpy as np

# Start memory tracing before Bokeh operations
tracemalloc.start()
output_notebook()

def create_memory_intensive_plot():
    # Generate large dataset
    n = 100000
    x = np.random.random(n)
    y = np.random.random(n)
    
    # Create Bokeh plot
    p = figure(width=800, height=600)
    p.circle(x, y, size=5, alpha=0.6)
    show(p)

# Take snapshot before
snapshot1 = tracemalloc.take_snapshot()

create_memory_intensive_plot()

# Take snapshot after
snapshot2 = tracemalloc.take_snapshot()

# Compare snapshots to identify memory growth
top_stats = snapshot2.compare_to(snapshot1, 'lineno')

print("๐Ÿ” Top 10 memory allocation differences:")
for stat in top_stats[:10]:
    print(f"๐Ÿ“ {stat}")
      

2. memory_profiler (Third-party)

Best for: Function-level memory monitoring with simple @profile decorator

# Install: pip install memory-profiler
from memory_profiler import profile
from bokeh.plotting import figure, show
import pandas as pd

@profile
def bokeh_dataframe_plot(df_size=100000):
    """Profile memory usage of Bokeh with large DataFrame"""
    # Create large DataFrame
    df = pd.DataFrame({
        'x': range(df_size),
        'y': range(df_size),
        'category': ['A', 'B', 'C'] * (df_size // 3 + 1)
    })
    
    # Create Bokeh plot from DataFrame
    p = figure(width=800, height=600)
    
    # This line often causes memory spikes
    p.circle('x', 'y', source=df, color='blue', size=8)
    
    # Memory cleanup attempt
    del df
    return p

# Run with: python -m memory_profiler your_script.py
if __name__ == "__main__":
    plot = bokeh_dataframe_plot(50000)
      

3. Pympler (muppy) – Advanced Object Tracking

Best for: Detailed heap analysis and object lifecycle tracking

# Install: pip install pympler
from pympler import muppy, summary
from bokeh.plotting import figure, show, output_file
import gc

def analyze_bokeh_memory_growth():
    """Analyze object growth during Bokeh plotting"""
    
    # Baseline memory snapshot
    print("๐Ÿ“Š Taking baseline memory snapshot...")
    baseline_objects = muppy.get_objects()
    baseline_summary = summary.summarize(baseline_objects)
    
    # Create multiple Bokeh plots (simulating repeated operations)
    plots = []
    for i in range(5):
        p = figure(width=400, height=300, title=f"Plot {i}")
        p.line(range(1000), [x**2 for x in range(1000)])
        plots.append(p)
    
    # Force garbage collection
    gc.collect()
    
    # Take snapshot after plotting
    after_objects = muppy.get_objects()
    after_summary = summary.summarize(after_objects)
    
    # Compare memory usage
    print("\n๐Ÿ” Memory difference analysis:")
    diff = summary.get_diff(baseline_summary, after_summary)
    summary.print_(diff)
    
    return plots

# Execute analysis
plots = analyze_bokeh_memory_growth()
      

Bokeh-Specific Memory Diagnostics

Jupyter Notebook Memory Accumulation

A critical issue where repeated cell execution causes browser memory to grow by 400MB+ per run, eventually crashing Chrome/Firefox.

# Problem diagnosis script for Jupyter memory leaks
import tracemalloc
from bokeh.io import output_notebook, show, push_notebook
from bokeh.plotting import figure
import numpy as np

def diagnose_jupyter_memory_leak():
    """Identify if Bokeh is properly cleaning up in Jupyter"""
    
    tracemalloc.start()
    output_notebook()
    
    # Create test data
    n_points = 20000
    n_lines = 100
    
    print(f"๐Ÿงช Testing with {n_lines} lines, {n_points} points each")
    
    # Take memory snapshot before plotting
    snapshot_before = tracemalloc.take_snapshot()
    
    # Create plot (this is the problematic operation)
    plot = figure(x_axis_type="linear", width=800, height=600)
    
    for i in range(n_lines):
        x = np.arange(n_points)
        y = np.cumsum(np.random.randn(n_points)) + i * 100
        plot.line(x, y, line_width=1, alpha=0.8)
    
    show(plot)
    
    # Take snapshot after plotting
    snapshot_after = tracemalloc.take_snapshot()
    
    # Analyze memory difference
    top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')
    
    print("\n๐Ÿ“ˆ Memory allocation by line:")
    for i, stat in enumerate(top_stats[:5]):
        print(f"{i+1}. {stat}")
    
    # Attempt cleanup
    try:
        from bokeh.io import curdoc
        curdoc().clear()
        print("โœ… Document cleared")
    except:
        print("โŒ Failed to clear document")

# Run this in a Jupyter cell and execute multiple times
diagnose_jupyter_memory_leak()
    

Server Memory Leak Detection

Bokeh server applications can accumulate memory through improper session cleanup and lingering object references.

# Server memory monitoring script
import psutil
import time
from bokeh.application import Application
from bokeh.application.handlers import FunctionHandler
from bokeh.server.server import Server
import threading

def monitor_server_memory(server_process_name="bokeh"):
    """Monitor Bokeh server memory usage over time"""
    
    print("๐Ÿ” Monitoring Bokeh server memory usage...")
    print("Process Name | PID | Memory (MB) | Memory % | Timestamp")
    print("-" * 65)
    
    try:
        while True:
            for proc in psutil.process_iter(['pid', 'name', 'memory_info', 'memory_percent']):
                try:
                    if server_process_name.lower() in proc.info['name'].lower():
                        memory_mb = proc.info['memory_info'].rss / (1024 * 1024)
                        memory_percent = proc.info['memory_percent']
                        timestamp = time.strftime("%H:%M:%S")
                        
                        print(f"{proc.info['name']:<12} | {proc.info['pid']:<4} | "
                              f"{memory_mb:>10.1f} | {memory_percent:>8.1f}% | {timestamp}")
                        
                        # Alert if memory usage is high
                        if memory_mb > 1000:  # Alert if > 1GB
                            print(f"โš ๏ธ  HIGH MEMORY USAGE DETECTED: {memory_mb:.1f} MB")
                            
                except (psutil.NoSuchProcess, psutil.AccessDenied):
                    continue
            
            time.sleep(10)  # Check every 10 seconds
            
    except KeyboardInterrupt:
        print("\n๐Ÿ›‘ Monitoring stopped")

# Run in separate thread while testing your Bokeh server
memory_thread = threading.Thread(target=monitor_server_memory, daemon=True)
memory_thread.start()
    

Advanced Memory Debugging Techniques

Object Reference Tracking

Identify objects that should be garbage collected but aren’t being released.

Tool Use Case Installation Key Features
tracemalloc Line-by-line allocation tracking Built-in (Python 3.4+) Stack traces, snapshots, comparisons
memory_profiler Function-level profiling pip install memory-profiler @profile decorator, real-time monitoring
Pympler Heap analysis, object tracking pip install pympler Object summaries, leak detection
objgraph Object relationship visualization pip install objgraph Reference chains, growth tracking

Production Environment Memory Monitoring

# Production-ready memory monitoring for Bokeh apps
import logging
import tracemalloc
from functools import wraps
from bokeh.application.handlers import FunctionHandler

def memory_monitor(func):
    """Decorator to monitor memory usage of Bokeh handlers"""
    @wraps(func)
    def wrapper(*args, **kwargs):
        # Start tracing if not already started
        if not tracemalloc.is_tracing():
            tracemalloc.start()
        
        # Take snapshot before function execution
        snapshot_before = tracemalloc.take_snapshot()
        
        try:
            result = func(*args, **kwargs)
        finally:
            # Take snapshot after execution
            snapshot_after = tracemalloc.take_snapshot()
            
            # Calculate memory difference
            top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')
            
            # Log significant memory usage (> 10MB)
            total_memory = sum(stat.size for stat in top_stats)
            if total_memory > 10 * 1024 * 1024:  # 10MB threshold
                logging.warning(f"High memory usage in {func.__name__}: "
                              f"{total_memory / (1024*1024):.1f} MB")
                
                # Log top memory consumers
                for stat in top_stats[:3]:
                    logging.info(f"Memory hotspot: {stat}")
        
        return result
    return wrapper

# Usage in Bokeh application
@memory_monitor
def create_app(doc):
    """Bokeh application with memory monitoring"""
    # Your Bokeh app code here
    from bokeh.plotting import figure
    p = figure()
    # ... add glyphs, widgets, etc.
    doc.add_root(p)

# Configure logging
logging.basicConfig(level=logging.INFO, 
                   format='%(asctime)s - %(levelname)s - %(message)s')

# Create and serve application
handler = FunctionHandler(create_app)
app = Application(handler)
    

Memory Issue Resolution Strategies

๐ŸŽฏ Quick Diagnosis Checklist

  1. Browser Memory (Jupyter): Use browser dev tools โ†’ Memory tab to track DOM/JS objects
  2. Python Process Memory: Use tracemalloc for allocation tracking
  3. Object Lifecycle: Use pympler to identify unreleased objects
  4. Server Memory: Monitor with psutil and implement cleanup hooks

Environment-Specific Solutions

Jupyter Notebook Memory Fixes:

  • Use bokeh.io.reset_output() and curdoc().clear() before each plot
  • Restart kernel periodically when working with large datasets
  • Use output_file() instead of output_notebook() for memory-intensive plots

Production Server Memory Management:

  • Configure session timeouts: --unused-session-lifetime and --check-unused-sessions
  • Implement custom cleanup with curdoc().on_session_destroyed()
  • Use memory limits in Docker containers and monitor with external tools

Automated Memory Testing Framework

# Complete memory testing framework for Bokeh applications
import unittest
import tracemalloc
import gc
from bokeh.plotting import figure, show
from bokeh.io import output_file

class BokehMemoryTestCase(unittest.TestCase):
    """Base test case for Bokeh memory testing"""
    
    def setUp(self):
        """Start memory tracing before each test"""
        tracemalloc.start()
        self.initial_snapshot = tracemalloc.take_snapshot()
    
    def tearDown(self):
        """Check for memory leaks after each test"""
        gc.collect()  # Force garbage collection
        final_snapshot = tracemalloc.take_snapshot()
        
        # Compare memory usage
        top_stats = final_snapshot.compare_to(self.initial_snapshot, 'lineno')
        total_memory = sum(stat.size for stat in top_stats)
        
        # Assert memory usage is within acceptable limits (50MB)
        max_memory_mb = 50
        actual_memory_mb = total_memory / (1024 * 1024)
        
        self.assertLess(actual_memory_mb, max_memory_mb,
                       f"Memory usage {actual_memory_mb:.1f}MB exceeds limit {max_memory_mb}MB")
    
    def test_large_scatter_plot_memory(self):
        """Test memory usage of large scatter plots"""
        import numpy as np
        
        n_points = 100000
        x = np.random.random(n_points)
        y = np.random.random(n_points)
        
        p = figure(width=800, height=600)
        p.circle(x, y, size=2, alpha=0.5)
        
        output_file("test_scatter.html")
        show(p)
        
        # Cleanup
        del x, y, p
    
    def test_multiple_plots_cleanup(self):
        """Test that multiple plots don't accumulate memory"""
        plots = []
        for i in range(10):
            p = figure(width=400, height=300)
            p.line(range(1000), [x**2 for x in range(1000)])
            plots.append(p)
        
        # Explicit cleanup
        del plots

if __name__ == "__main__":
    unittest.main()
    

Summary: Memory Profiling Best Practices

  • Start with built-in tools: Use tracemalloc for initial memory allocation analysis
  • Profile incrementally: Use @profile decorator to isolate memory-intensive functions
  • Monitor production apps: Implement continuous memory monitoring with alerts
  • Test memory behavior: Create automated tests that verify memory usage stays within limits
  • Environment-specific solutions: Apply Jupyter-specific and server-specific memory management techniques
๐Ÿšจ Critical Memory Patterns to Watch:

  • Exponential memory growth in Jupyter after repeated cell execution
  • Server memory that never decreases after sessions end
  • Large ColumnDataSource objects not being garbage collected
  • JavaScript heap growth in browser (check browser dev tools)
See also  How to Scale Bokeh Applications for Production