Memory issues in Bokeh applications can manifest as browser crashes, slow rendering, server instability, or gradual RAM consumption growthโespecially when using Jupyter notebooks, large datasets, or complex interactive dashboards. This comprehensive guide provides actionable techniques for identifying, diagnosing, and resolving memory problems using proven Python profiling tools and Bokeh-specific debugging strategies.
- Chrome/Firefox consuming 400MB+ per plot execution in Jupyter
- Bokeh server process memory growing continuously without cleanup
- “Extra unexpected referrers” errors in Bokeh logs
- Browser tabs becoming unresponsive with large datasets
Essential Memory Profiling Tools for Bokeh Applications
1. tracemalloc (Built-in Python 3.4+)
Best for: Line-by-line memory allocation tracking and identifying specific code causing memory growth
import tracemalloc from bokeh.plotting import figure, show from bokeh.io import output_notebook import numpy as np # Start memory tracing before Bokeh operations tracemalloc.start() output_notebook() def create_memory_intensive_plot(): # Generate large dataset n = 100000 x = np.random.random(n) y = np.random.random(n) # Create Bokeh plot p = figure(width=800, height=600) p.circle(x, y, size=5, alpha=0.6) show(p) # Take snapshot before snapshot1 = tracemalloc.take_snapshot() create_memory_intensive_plot() # Take snapshot after snapshot2 = tracemalloc.take_snapshot() # Compare snapshots to identify memory growth top_stats = snapshot2.compare_to(snapshot1, 'lineno') print("๐ Top 10 memory allocation differences:") for stat in top_stats[:10]: print(f"๐ {stat}")
2. memory_profiler (Third-party)
Best for: Function-level memory monitoring with simple @profile decorator
# Install: pip install memory-profiler from memory_profiler import profile from bokeh.plotting import figure, show import pandas as pd @profile def bokeh_dataframe_plot(df_size=100000): """Profile memory usage of Bokeh with large DataFrame""" # Create large DataFrame df = pd.DataFrame({ 'x': range(df_size), 'y': range(df_size), 'category': ['A', 'B', 'C'] * (df_size // 3 + 1) }) # Create Bokeh plot from DataFrame p = figure(width=800, height=600) # This line often causes memory spikes p.circle('x', 'y', source=df, color='blue', size=8) # Memory cleanup attempt del df return p # Run with: python -m memory_profiler your_script.py if __name__ == "__main__": plot = bokeh_dataframe_plot(50000)
3. Pympler (muppy) – Advanced Object Tracking
Best for: Detailed heap analysis and object lifecycle tracking
# Install: pip install pympler from pympler import muppy, summary from bokeh.plotting import figure, show, output_file import gc def analyze_bokeh_memory_growth(): """Analyze object growth during Bokeh plotting""" # Baseline memory snapshot print("๐ Taking baseline memory snapshot...") baseline_objects = muppy.get_objects() baseline_summary = summary.summarize(baseline_objects) # Create multiple Bokeh plots (simulating repeated operations) plots = [] for i in range(5): p = figure(width=400, height=300, title=f"Plot {i}") p.line(range(1000), [x**2 for x in range(1000)]) plots.append(p) # Force garbage collection gc.collect() # Take snapshot after plotting after_objects = muppy.get_objects() after_summary = summary.summarize(after_objects) # Compare memory usage print("\n๐ Memory difference analysis:") diff = summary.get_diff(baseline_summary, after_summary) summary.print_(diff) return plots # Execute analysis plots = analyze_bokeh_memory_growth()
Bokeh-Specific Memory Diagnostics
Jupyter Notebook Memory Accumulation
A critical issue where repeated cell execution causes browser memory to grow by 400MB+ per run, eventually crashing Chrome/Firefox.
# Problem diagnosis script for Jupyter memory leaks import tracemalloc from bokeh.io import output_notebook, show, push_notebook from bokeh.plotting import figure import numpy as np def diagnose_jupyter_memory_leak(): """Identify if Bokeh is properly cleaning up in Jupyter""" tracemalloc.start() output_notebook() # Create test data n_points = 20000 n_lines = 100 print(f"๐งช Testing with {n_lines} lines, {n_points} points each") # Take memory snapshot before plotting snapshot_before = tracemalloc.take_snapshot() # Create plot (this is the problematic operation) plot = figure(x_axis_type="linear", width=800, height=600) for i in range(n_lines): x = np.arange(n_points) y = np.cumsum(np.random.randn(n_points)) + i * 100 plot.line(x, y, line_width=1, alpha=0.8) show(plot) # Take snapshot after plotting snapshot_after = tracemalloc.take_snapshot() # Analyze memory difference top_stats = snapshot_after.compare_to(snapshot_before, 'lineno') print("\n๐ Memory allocation by line:") for i, stat in enumerate(top_stats[:5]): print(f"{i+1}. {stat}") # Attempt cleanup try: from bokeh.io import curdoc curdoc().clear() print("โ Document cleared") except: print("โ Failed to clear document") # Run this in a Jupyter cell and execute multiple times diagnose_jupyter_memory_leak()
Server Memory Leak Detection
Bokeh server applications can accumulate memory through improper session cleanup and lingering object references.
# Server memory monitoring script import psutil import time from bokeh.application import Application from bokeh.application.handlers import FunctionHandler from bokeh.server.server import Server import threading def monitor_server_memory(server_process_name="bokeh"): """Monitor Bokeh server memory usage over time""" print("๐ Monitoring Bokeh server memory usage...") print("Process Name | PID | Memory (MB) | Memory % | Timestamp") print("-" * 65) try: while True: for proc in psutil.process_iter(['pid', 'name', 'memory_info', 'memory_percent']): try: if server_process_name.lower() in proc.info['name'].lower(): memory_mb = proc.info['memory_info'].rss / (1024 * 1024) memory_percent = proc.info['memory_percent'] timestamp = time.strftime("%H:%M:%S") print(f"{proc.info['name']:<12} | {proc.info['pid']:<4} | " f"{memory_mb:>10.1f} | {memory_percent:>8.1f}% | {timestamp}") # Alert if memory usage is high if memory_mb > 1000: # Alert if > 1GB print(f"โ ๏ธ HIGH MEMORY USAGE DETECTED: {memory_mb:.1f} MB") except (psutil.NoSuchProcess, psutil.AccessDenied): continue time.sleep(10) # Check every 10 seconds except KeyboardInterrupt: print("\n๐ Monitoring stopped") # Run in separate thread while testing your Bokeh server memory_thread = threading.Thread(target=monitor_server_memory, daemon=True) memory_thread.start()
Advanced Memory Debugging Techniques
Object Reference Tracking
Identify objects that should be garbage collected but aren’t being released.
Tool | Use Case | Installation | Key Features |
---|---|---|---|
tracemalloc | Line-by-line allocation tracking | Built-in (Python 3.4+) | Stack traces, snapshots, comparisons |
memory_profiler | Function-level profiling | pip install memory-profiler |
@profile decorator, real-time monitoring |
Pympler | Heap analysis, object tracking | pip install pympler |
Object summaries, leak detection |
objgraph | Object relationship visualization | pip install objgraph |
Reference chains, growth tracking |
Production Environment Memory Monitoring
# Production-ready memory monitoring for Bokeh apps import logging import tracemalloc from functools import wraps from bokeh.application.handlers import FunctionHandler def memory_monitor(func): """Decorator to monitor memory usage of Bokeh handlers""" @wraps(func) def wrapper(*args, **kwargs): # Start tracing if not already started if not tracemalloc.is_tracing(): tracemalloc.start() # Take snapshot before function execution snapshot_before = tracemalloc.take_snapshot() try: result = func(*args, **kwargs) finally: # Take snapshot after execution snapshot_after = tracemalloc.take_snapshot() # Calculate memory difference top_stats = snapshot_after.compare_to(snapshot_before, 'lineno') # Log significant memory usage (> 10MB) total_memory = sum(stat.size for stat in top_stats) if total_memory > 10 * 1024 * 1024: # 10MB threshold logging.warning(f"High memory usage in {func.__name__}: " f"{total_memory / (1024*1024):.1f} MB") # Log top memory consumers for stat in top_stats[:3]: logging.info(f"Memory hotspot: {stat}") return result return wrapper # Usage in Bokeh application @memory_monitor def create_app(doc): """Bokeh application with memory monitoring""" # Your Bokeh app code here from bokeh.plotting import figure p = figure() # ... add glyphs, widgets, etc. doc.add_root(p) # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Create and serve application handler = FunctionHandler(create_app) app = Application(handler)
Memory Issue Resolution Strategies
๐ฏ Quick Diagnosis Checklist
- Browser Memory (Jupyter): Use browser dev tools โ Memory tab to track DOM/JS objects
- Python Process Memory: Use
tracemalloc
for allocation tracking - Object Lifecycle: Use
pympler
to identify unreleased objects - Server Memory: Monitor with
psutil
and implement cleanup hooks
Environment-Specific Solutions
Jupyter Notebook Memory Fixes:
- Use
bokeh.io.reset_output()
andcurdoc().clear()
before each plot- Restart kernel periodically when working with large datasets
- Use
output_file()
instead ofoutput_notebook()
for memory-intensive plots
Production Server Memory Management:
- Configure session timeouts:
--unused-session-lifetime
and--check-unused-sessions
- Implement custom cleanup with
curdoc().on_session_destroyed()
- Use memory limits in Docker containers and monitor with external tools
Automated Memory Testing Framework
# Complete memory testing framework for Bokeh applications import unittest import tracemalloc import gc from bokeh.plotting import figure, show from bokeh.io import output_file class BokehMemoryTestCase(unittest.TestCase): """Base test case for Bokeh memory testing""" def setUp(self): """Start memory tracing before each test""" tracemalloc.start() self.initial_snapshot = tracemalloc.take_snapshot() def tearDown(self): """Check for memory leaks after each test""" gc.collect() # Force garbage collection final_snapshot = tracemalloc.take_snapshot() # Compare memory usage top_stats = final_snapshot.compare_to(self.initial_snapshot, 'lineno') total_memory = sum(stat.size for stat in top_stats) # Assert memory usage is within acceptable limits (50MB) max_memory_mb = 50 actual_memory_mb = total_memory / (1024 * 1024) self.assertLess(actual_memory_mb, max_memory_mb, f"Memory usage {actual_memory_mb:.1f}MB exceeds limit {max_memory_mb}MB") def test_large_scatter_plot_memory(self): """Test memory usage of large scatter plots""" import numpy as np n_points = 100000 x = np.random.random(n_points) y = np.random.random(n_points) p = figure(width=800, height=600) p.circle(x, y, size=2, alpha=0.5) output_file("test_scatter.html") show(p) # Cleanup del x, y, p def test_multiple_plots_cleanup(self): """Test that multiple plots don't accumulate memory""" plots = [] for i in range(10): p = figure(width=400, height=300) p.line(range(1000), [x**2 for x in range(1000)]) plots.append(p) # Explicit cleanup del plots if __name__ == "__main__": unittest.main()
Summary: Memory Profiling Best Practices
- Start with built-in tools: Use
tracemalloc
for initial memory allocation analysis - Profile incrementally: Use
@profile
decorator to isolate memory-intensive functions - Monitor production apps: Implement continuous memory monitoring with alerts
- Test memory behavior: Create automated tests that verify memory usage stays within limits
- Environment-specific solutions: Apply Jupyter-specific and server-specific memory management techniques
- Exponential memory growth in Jupyter after repeated cell execution
- Server memory that never decreases after sessions end
- Large ColumnDataSource objects not being garbage collected
- JavaScript heap growth in browser (check browser dev tools)