Memory issues in Bokeh applications can manifest as browser crashes, slow rendering, server instability, or gradual RAM consumption growthโespecially when using Jupyter notebooks, large datasets, or complex interactive dashboards. This comprehensive guide provides actionable techniques for identifying, diagnosing, and resolving memory problems using proven Python profiling tools and Bokeh-specific debugging strategies.
- Chrome/Firefox consuming 400MB+ per plot execution in Jupyter
- Bokeh server process memory growing continuously without cleanup
- “Extra unexpected referrers” errors in Bokeh logs
- Browser tabs becoming unresponsive with large datasets
Essential Memory Profiling Tools for Bokeh Applications
1. tracemalloc (Built-in Python 3.4+)
Best for: Line-by-line memory allocation tracking and identifying specific code causing memory growth
import tracemalloc
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import numpy as np
# Start memory tracing before Bokeh operations
tracemalloc.start()
output_notebook()
def create_memory_intensive_plot():
# Generate large dataset
n = 100000
x = np.random.random(n)
y = np.random.random(n)
# Create Bokeh plot
p = figure(width=800, height=600)
p.circle(x, y, size=5, alpha=0.6)
show(p)
# Take snapshot before
snapshot1 = tracemalloc.take_snapshot()
create_memory_intensive_plot()
# Take snapshot after
snapshot2 = tracemalloc.take_snapshot()
# Compare snapshots to identify memory growth
top_stats = snapshot2.compare_to(snapshot1, 'lineno')
print("๐ Top 10 memory allocation differences:")
for stat in top_stats[:10]:
print(f"๐ {stat}")
2. memory_profiler (Third-party)
Best for: Function-level memory monitoring with simple @profile decorator
# Install: pip install memory-profiler
from memory_profiler import profile
from bokeh.plotting import figure, show
import pandas as pd
@profile
def bokeh_dataframe_plot(df_size=100000):
"""Profile memory usage of Bokeh with large DataFrame"""
# Create large DataFrame
df = pd.DataFrame({
'x': range(df_size),
'y': range(df_size),
'category': ['A', 'B', 'C'] * (df_size // 3 + 1)
})
# Create Bokeh plot from DataFrame
p = figure(width=800, height=600)
# This line often causes memory spikes
p.circle('x', 'y', source=df, color='blue', size=8)
# Memory cleanup attempt
del df
return p
# Run with: python -m memory_profiler your_script.py
if __name__ == "__main__":
plot = bokeh_dataframe_plot(50000)
3. Pympler (muppy) – Advanced Object Tracking
Best for: Detailed heap analysis and object lifecycle tracking
# Install: pip install pympler
from pympler import muppy, summary
from bokeh.plotting import figure, show, output_file
import gc
def analyze_bokeh_memory_growth():
"""Analyze object growth during Bokeh plotting"""
# Baseline memory snapshot
print("๐ Taking baseline memory snapshot...")
baseline_objects = muppy.get_objects()
baseline_summary = summary.summarize(baseline_objects)
# Create multiple Bokeh plots (simulating repeated operations)
plots = []
for i in range(5):
p = figure(width=400, height=300, title=f"Plot {i}")
p.line(range(1000), [x**2 for x in range(1000)])
plots.append(p)
# Force garbage collection
gc.collect()
# Take snapshot after plotting
after_objects = muppy.get_objects()
after_summary = summary.summarize(after_objects)
# Compare memory usage
print("\n๐ Memory difference analysis:")
diff = summary.get_diff(baseline_summary, after_summary)
summary.print_(diff)
return plots
# Execute analysis
plots = analyze_bokeh_memory_growth()
Bokeh-Specific Memory Diagnostics
Jupyter Notebook Memory Accumulation
A critical issue where repeated cell execution causes browser memory to grow by 400MB+ per run, eventually crashing Chrome/Firefox.
# Problem diagnosis script for Jupyter memory leaks
import tracemalloc
from bokeh.io import output_notebook, show, push_notebook
from bokeh.plotting import figure
import numpy as np
def diagnose_jupyter_memory_leak():
"""Identify if Bokeh is properly cleaning up in Jupyter"""
tracemalloc.start()
output_notebook()
# Create test data
n_points = 20000
n_lines = 100
print(f"๐งช Testing with {n_lines} lines, {n_points} points each")
# Take memory snapshot before plotting
snapshot_before = tracemalloc.take_snapshot()
# Create plot (this is the problematic operation)
plot = figure(x_axis_type="linear", width=800, height=600)
for i in range(n_lines):
x = np.arange(n_points)
y = np.cumsum(np.random.randn(n_points)) + i * 100
plot.line(x, y, line_width=1, alpha=0.8)
show(plot)
# Take snapshot after plotting
snapshot_after = tracemalloc.take_snapshot()
# Analyze memory difference
top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')
print("\n๐ Memory allocation by line:")
for i, stat in enumerate(top_stats[:5]):
print(f"{i+1}. {stat}")
# Attempt cleanup
try:
from bokeh.io import curdoc
curdoc().clear()
print("โ
Document cleared")
except:
print("โ Failed to clear document")
# Run this in a Jupyter cell and execute multiple times
diagnose_jupyter_memory_leak()
Server Memory Leak Detection
Bokeh server applications can accumulate memory through improper session cleanup and lingering object references.
# Server memory monitoring script import psutil import time from bokeh.application import Application from bokeh.application.handlers import FunctionHandler from bokeh.server.server import Server import threading def monitor_server_memory(server_process_name="bokeh"): """Monitor Bokeh server memory usage over time""" print("๐ Monitoring Bokeh server memory usage...") print("Process Name | PID | Memory (MB) | Memory % | Timestamp") print("-" * 65) try: while True: for proc in psutil.process_iter(['pid', 'name', 'memory_info', 'memory_percent']): try: if server_process_name.lower() in proc.info['name'].lower(): memory_mb = proc.info['memory_info'].rss / (1024 * 1024) memory_percent = proc.info['memory_percent'] timestamp = time.strftime("%H:%M:%S") print(f"{proc.info['name']:<12} | {proc.info['pid']:<4} | " f"{memory_mb:>10.1f} | {memory_percent:>8.1f}% | {timestamp}") # Alert if memory usage is high if memory_mb > 1000: # Alert if > 1GB print(f"โ ๏ธ HIGH MEMORY USAGE DETECTED: {memory_mb:.1f} MB") except (psutil.NoSuchProcess, psutil.AccessDenied): continue time.sleep(10) # Check every 10 seconds except KeyboardInterrupt: print("\n๐ Monitoring stopped") # Run in separate thread while testing your Bokeh server memory_thread = threading.Thread(target=monitor_server_memory, daemon=True) memory_thread.start()
Advanced Memory Debugging Techniques
Object Reference Tracking
Identify objects that should be garbage collected but aren’t being released.
| Tool | Use Case | Installation | Key Features |
|---|---|---|---|
| tracemalloc | Line-by-line allocation tracking | Built-in (Python 3.4+) | Stack traces, snapshots, comparisons |
| memory_profiler | Function-level profiling | pip install memory-profiler |
@profile decorator, real-time monitoring |
| Pympler | Heap analysis, object tracking | pip install pympler |
Object summaries, leak detection |
| objgraph | Object relationship visualization | pip install objgraph |
Reference chains, growth tracking |
Production Environment Memory Monitoring
# Production-ready memory monitoring for Bokeh apps
import logging
import tracemalloc
from functools import wraps
from bokeh.application.handlers import FunctionHandler
def memory_monitor(func):
"""Decorator to monitor memory usage of Bokeh handlers"""
@wraps(func)
def wrapper(*args, **kwargs):
# Start tracing if not already started
if not tracemalloc.is_tracing():
tracemalloc.start()
# Take snapshot before function execution
snapshot_before = tracemalloc.take_snapshot()
try:
result = func(*args, **kwargs)
finally:
# Take snapshot after execution
snapshot_after = tracemalloc.take_snapshot()
# Calculate memory difference
top_stats = snapshot_after.compare_to(snapshot_before, 'lineno')
# Log significant memory usage (> 10MB)
total_memory = sum(stat.size for stat in top_stats)
if total_memory > 10 * 1024 * 1024: # 10MB threshold
logging.warning(f"High memory usage in {func.__name__}: "
f"{total_memory / (1024*1024):.1f} MB")
# Log top memory consumers
for stat in top_stats[:3]:
logging.info(f"Memory hotspot: {stat}")
return result
return wrapper
# Usage in Bokeh application
@memory_monitor
def create_app(doc):
"""Bokeh application with memory monitoring"""
# Your Bokeh app code here
from bokeh.plotting import figure
p = figure()
# ... add glyphs, widgets, etc.
doc.add_root(p)
# Configure logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
# Create and serve application
handler = FunctionHandler(create_app)
app = Application(handler)
Memory Issue Resolution Strategies
๐ฏ Quick Diagnosis Checklist
- Browser Memory (Jupyter): Use browser dev tools โ Memory tab to track DOM/JS objects
- Python Process Memory: Use
tracemallocfor allocation tracking - Object Lifecycle: Use
pymplerto identify unreleased objects - Server Memory: Monitor with
psutiland implement cleanup hooks
Environment-Specific Solutions
Jupyter Notebook Memory Fixes:
- Use
bokeh.io.reset_output()andcurdoc().clear()before each plot- Restart kernel periodically when working with large datasets
- Use
output_file()instead ofoutput_notebook()for memory-intensive plots
Production Server Memory Management:
- Configure session timeouts:
--unused-session-lifetimeand--check-unused-sessions- Implement custom cleanup with
curdoc().on_session_destroyed()- Use memory limits in Docker containers and monitor with external tools
Automated Memory Testing Framework
# Complete memory testing framework for Bokeh applications
import unittest
import tracemalloc
import gc
from bokeh.plotting import figure, show
from bokeh.io import output_file
class BokehMemoryTestCase(unittest.TestCase):
"""Base test case for Bokeh memory testing"""
def setUp(self):
"""Start memory tracing before each test"""
tracemalloc.start()
self.initial_snapshot = tracemalloc.take_snapshot()
def tearDown(self):
"""Check for memory leaks after each test"""
gc.collect() # Force garbage collection
final_snapshot = tracemalloc.take_snapshot()
# Compare memory usage
top_stats = final_snapshot.compare_to(self.initial_snapshot, 'lineno')
total_memory = sum(stat.size for stat in top_stats)
# Assert memory usage is within acceptable limits (50MB)
max_memory_mb = 50
actual_memory_mb = total_memory / (1024 * 1024)
self.assertLess(actual_memory_mb, max_memory_mb,
f"Memory usage {actual_memory_mb:.1f}MB exceeds limit {max_memory_mb}MB")
def test_large_scatter_plot_memory(self):
"""Test memory usage of large scatter plots"""
import numpy as np
n_points = 100000
x = np.random.random(n_points)
y = np.random.random(n_points)
p = figure(width=800, height=600)
p.circle(x, y, size=2, alpha=0.5)
output_file("test_scatter.html")
show(p)
# Cleanup
del x, y, p
def test_multiple_plots_cleanup(self):
"""Test that multiple plots don't accumulate memory"""
plots = []
for i in range(10):
p = figure(width=400, height=300)
p.line(range(1000), [x**2 for x in range(1000)])
plots.append(p)
# Explicit cleanup
del plots
if __name__ == "__main__":
unittest.main()
Summary: Memory Profiling Best Practices
- Start with built-in tools: Use
tracemallocfor initial memory allocation analysis - Profile incrementally: Use
@profiledecorator to isolate memory-intensive functions - Monitor production apps: Implement continuous memory monitoring with alerts
- Test memory behavior: Create automated tests that verify memory usage stays within limits
- Environment-specific solutions: Apply Jupyter-specific and server-specific memory management techniques
- Exponential memory growth in Jupyter after repeated cell execution
- Server memory that never decreases after sessions end
- Large ColumnDataSource objects not being garbage collected
- JavaScript heap growth in browser (check browser dev tools)
