feat: implemented timestamp-based LRU caching for all workbooks

- add timestamp to cache keys to properly handle deleted/replaced files
- create modular LRUCache implementation to limit memory usage (20
  workbooks and 255 worksheet per workbook)
- added unit tets for LRUCache
This commit is contained in:
Asad Karimov 2025-05-16 17:29:09 -04:00
parent 383b1d610f
commit 21f4542e5f
7 changed files with 391 additions and 127 deletions

BIN
image.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 142 KiB

@ -5,7 +5,7 @@
"author": "Asadbek Karimov <contact@asadk.dev>",
"publisher": "asadbek",
"icon": "img/logo.png",
"version": "0.0.7",
"version": "0.0.8",
"license": "Apache-2.0",
"bugs": {
"url": "https://git.sheetjs.com/asadbek064/sheetjs-vscode-extension/issues"
@ -35,99 +35,37 @@
"viewType": "excelViewer.spreadsheet",
"displayName": "SheetJS Viewer",
"selector": [
{
"filenamePattern": "*.xlsx"
},
{
"filenamePattern": "*.xlsm"
},
{
"filenamePattern": "*.xlsb"
},
{
"filenamePattern": "*.xls"
},
{
"filenamePattern": "*.xlw"
},
{
"filenamePattern": "*.xlr"
},
{
"filenamePattern": "*.csv"
},
{
"filenamePattern": "*.dif"
},
{
"filenamePattern": "*.slk"
},
{
"filenamePattern": "*.sylk"
},
{
"filenamePattern": "*.prn"
},
{
"filenamePattern": "*.numbers"
},
{
"filenamePattern": "*.et"
},
{
"filenamePattern": "*.ods"
},
{
"filenamePattern": "*.fods"
},
{
"filenamePattern": "*.uos"
},
{
"filenamePattern": "*.dbf"
},
{
"filenamePattern": "*.wk1"
},
{
"filenamePattern": "*.wk3"
},
{
"filenamePattern": "*.wks"
},
{
"filenamePattern": "*.wk2"
},
{
"filenamePattern": "*.wk4"
},
{
"filenamePattern": "*.123"
},
{
"filenamePattern": "*.wq1"
},
{
"filenamePattern": "*.wq2"
},
{
"filenamePattern": "*.wb1"
},
{
"filenamePattern": "*.wb2"
},
{
"filenamePattern": "*.wb3"
},
{
"filenamePattern": "*.qpw"
},
{
"filenamePattern": "*.xlr"
},
{
"filenamePattern": "*.eth"
}
{"filenamePattern": "*.xlsx"},
{"filenamePattern": "*.xlsm"},
{"filenamePattern": "*.xlsb"},
{"filenamePattern": "*.xls"},
{"filenamePattern": "*.xlw"},
{"filenamePattern": "*.xlr"},
{"filenamePattern": "*.csv"},
{"filenamePattern": "*.dif"},
{"filenamePattern": "*.slk"},
{"filenamePattern": "*.sylk"},
{"filenamePattern": "*.prn"},
{"filenamePattern": "*.numbers"},
{"filenamePattern": "*.et"},
{"filenamePattern": "*.ods"},
{"filenamePattern": "*.fods"},
{"filenamePattern": "*.uos"},
{"filenamePattern": "*.dbf"},
{"filenamePattern": "*.wk1"},
{"filenamePattern": "*.wk3"},
{"filenamePattern": "*.wks"},
{"filenamePattern": "*.wk2"},
{"filenamePattern": "*.wk4"},
{"filenamePattern": "*.123"},
{"filenamePattern": "*.wq1"},
{"filenamePattern": "*.wq2"},
{"filenamePattern": "*.wb1"},
{"filenamePattern": "*.wb2"},
{"filenamePattern": "*.wb3"},
{"filenamePattern": "*.qpw"},
{"filenamePattern": "*.xlr"},
{"filenamePattern": "*.eth" }
],
"priority": "default"
}

@ -0,0 +1,126 @@
/**
* generic Least Recently Used (LRU) cache implementation
*/
export class LRUCache<K, V> {
private cache = new Map<K, V>();
private accessTimes = new Map<K, number>();
private readonly maxSize: number;
/**
* create a new LRU cache
* @param maxSize Maximum number of items to store in the cache
*/
constructor(maxSize: number) {
this.maxSize = maxSize;
}
/**
* get an item from the cache
* @param key cache key
* @returns cached value or undefined if not found
*/
get(key: K): V | undefined {
if (!this.cache.has(key)) {
return undefined;
}
// update access time
this.accessTimes.set(key, Date.now());
return this.cache.get(key);
}
/**
* store an item in the cache
* @param key cache key
* @param value value to cache
*/
set(key: K, value: V): void {
// check if we need to make room
if (!this.cache.has(key) && this.cache.size >= this.maxSize) {
this.evictLeastRecentlyUsed();
}
// store the value and update access time
this.cache.set(key, value);
this.accessTimes.set(key, Date.now());
}
/**
* check if an item exists in the cache
* @param key The cache key
* @returns True if the item exists
*/
has(key: K): boolean {
return this.cache.has(key);
}
/**
* remove an item from the cache
* @param key key to remove
* @returns True if an item was removed
*/
delete(key: K): boolean {
this.accessTimes.delete(key);
return this.cache.delete(key);
}
/**
* get all keys in the cache
* @returns Array of cache keys
*/
keys(): K[] {
return [...this.cache.keys()];
}
/**
* get the current size of the cache
*/
get size(): number {
return this.cache.size;
}
/**
* clear all items from the cache
*/
clear(): void {
this.cache.clear();
this.accessTimes.clear();
}
/**
* remove items matching a prefix
* @param predicate Function that returns true for keys to remove
*/
deleteByPredicate(predicate: (key: K) => boolean): void {
for (const key of this.keys()) {
if (predicate(key)) {
this.delete(key);
}
}
}
/**
* remove the least recently used item from the cache
* @private
*/
private evictLeastRecentlyUsed(): void {
if (this.cache.size === 0) { return; }
// find the oldest entry
let oldestKey: K | undefined;
let oldestTime = Infinity;
for (const [key, time] of this.accessTimes.entries()) {
if (time < oldestTime) {
oldestTime = time;
oldestKey = key;
}
}
// remove the oldest entry
if (oldestKey !== undefined) {
console.log(`Evicting least recently used cache entry: ${String(oldestKey)}`);
this.delete(oldestKey);
}
}
}

@ -0,0 +1,104 @@
import * as vscode from 'vscode';
import * as XLSX from 'xlsx';
import { LRUCache } from './lruCache';
/**
* specialized cache for SheetJS workbooks with URI-based keys
*/
export class WorkbookCache {
private workbookCache: LRUCache<string, XLSX.WorkBook>;
private sheetCache: LRUCache<string, string>;
/**
* create a new workbook cache
* @param maxWorkbooks Maximum number of workbooks to cache
* @param maxSheets Maximum number of sheet HTML to cache
*/
constructor(maxWorkbooks: number = 10, maxSheets: number = 255) {
this.workbookCache = new LRUCache<string, XLSX.WorkBook>(maxWorkbooks);
this.sheetCache = new LRUCache<string, string>(maxSheets);
}
/**
* generate a cache key for a document
* @param uri Document URI
* @param mtime Modification time
*/
generateKey(uri: vscode.Uri, mtime: number): string {
return `${uri.toString()}-${mtime}`;
}
/**
* get a workbook from the cache
* @param key Cache key
*/
getWorkbook(key: string): XLSX.WorkBook | undefined {
return this.workbookCache.get(key);
}
/**
* store a workbook in the cache
* @param key Cache key
* @param workbook Workbook to cache
*/
setWorkbook(key: string, workbook: XLSX.WorkBook): void {
this.workbookCache.set(key, workbook);
}
/**
* check if a workbook exists in the cache
* @param key Cache key
*/
hasWorkbook(key: string): boolean {
return this.workbookCache.has(key);
}
/**
* get sheet HTML from the cache
* @param key Sheet cache key
*/
getSheet(key: string): string | undefined {
return this.sheetCache.get(key);
}
/**
* store sheet HTML in the cache
* @param key Sheet cache key
* @param html Sheet HTML
*/
setSheet(key: string, html: string): void {
this.sheetCache.set(key, html);
}
/**
* Check if sheet HTML exists in the cache
* @param key Sheet cache key
*/
hasSheet(key: string): boolean {
return this.sheetCache.has(key);
}
/**
* generate a sheet cache key
* @param baseKey Base workbook key
* @param sheetName Sheet name
* @param page Page number
*/
generateSheetKey(baseKey: string, sheetName: string, page: number): string {
return `${baseKey}-${sheetName}-page-${page}`;
}
/**
* clear all caches for a specific URI
* @param uriString URI string prefix to clear
*/
clearCachesForUri(uriString: string): void {
console.log(`Clearing caches for ${uriString}`);
// clear workbook cache entries for this URI
this.workbookCache.deleteByPredicate(key => key.startsWith(uriString));
// clear sheet cache entries for this URI
this.sheetCache.deleteByPredicate(key => key.startsWith(uriString));
}
}

@ -3,12 +3,11 @@ import * as XLSX from 'xlsx';
import { ExcelDocument } from './excelDocument';
import { getLoadingViewHtml, getErrorViewHtml, getExcelViewerHtml } from './webviewContent';
import { parseRange, colLetterToNum, numToColLetter } from './excelUtils';
import { WorkbookCache } from './cacheManagement/workbookCache';
export class ExcelEditorProvider implements vscode.CustomReadonlyEditorProvider<ExcelDocument> {
// cache workbooks in memory to avoid re-parsing
private workbookCache = new Map<string, XLSX.WorkBook>();
private sheetCache = new Map<string, string>();
private cache: WorkbookCache;
public static register(context: vscode.ExtensionContext): vscode.Disposable {
return vscode.window.registerCustomEditorProvider(
'excelViewer.spreadsheet',
@ -17,7 +16,9 @@ export class ExcelEditorProvider implements vscode.CustomReadonlyEditorProvider<
);
}
constructor(private readonly context: vscode.ExtensionContext) { }
constructor(private readonly context: vscode.ExtensionContext) {
this.cache = new WorkbookCache(20, 255);
}
async openCustomDocument(uri: vscode.Uri): Promise<ExcelDocument> {
console.log(`Opening document: ${uri.fsPath}`);
@ -66,23 +67,30 @@ export class ExcelEditorProvider implements vscode.CustomReadonlyEditorProvider<
}
private async processExcelFile(document: ExcelDocument, webviewPanel: vscode.WebviewPanel): Promise<void> {
const stat = await vscode.workspace.fs.stat(document.uri);
const cacheKey = `${document.uri.toString()}-${stat.mtime}`;
// check if we have a cached workbook for this file
let workbook: XLSX.WorkBook;
const cacheKey = document.uri.toString();
if (this.workbookCache.has(cacheKey)) {
if (this.cache.hasWorkbook(cacheKey)) {
console.log('Using cached workbook');
workbook = this.workbookCache.get(cacheKey)!;
workbook = this.cache.getWorkbook(cacheKey)!;
this.updateLoadingProgress(webviewPanel, 'Using cached workbook...');
} else {
// clear old caches for this URI
this.cache.clearCachesForUri(document.uri.toString());
// load workbook
workbook = await this.loadWorkbook(document, webviewPanel);
}
// setup the initial view with just the sheet selector
const sheetNames = workbook.SheetNames;
this.setupWebviewContent(document, webviewPanel, workbook, sheetNames);
this.setupWebviewContent(document, webviewPanel, workbook, sheetNames, cacheKey);
}
private async loadWorkbook(document: ExcelDocument, webviewPanel: vscode.WebviewPanel): Promise<XLSX.WorkBook> {
this.updateLoadingProgress(webviewPanel, 'Reading file...');
@ -121,9 +129,11 @@ export class ExcelEditorProvider implements vscode.CustomReadonlyEditorProvider<
this.updateLoadingProgress(webviewPanel, 'Preparing view...');
const stat = await vscode.workspace.fs.stat(document.uri);
const cacheKey = `${document.uri.toString()}-${stat.mtime}`;
// cache the workbook
const cacheKey = document.uri.toString();
this.workbookCache.set(cacheKey, workbook);
this.cache.setWorkbook(cacheKey, workbook);
return workbook;
} catch (error) {
@ -137,7 +147,8 @@ export class ExcelEditorProvider implements vscode.CustomReadonlyEditorProvider<
document: ExcelDocument,
webviewPanel: vscode.WebviewPanel,
workbook: XLSX.WorkBook,
sheetNames: string[]
sheetNames: string[],
cacheKey: string
): void {
// exit early if there are no sheets
if (sheetNames.length === 0) {
@ -165,20 +176,21 @@ export class ExcelEditorProvider implements vscode.CustomReadonlyEditorProvider<
webviewPanel.webview.html = getExcelViewerHtml(sheetNames, sheetSelector);
// handle messages from the webview
this.setupMessageHandlers(document, webviewPanel, workbook);
this.setupMessageHandlers(document, webviewPanel, workbook, cacheKey);
}
private setupMessageHandlers(
document: ExcelDocument,
webviewPanel: vscode.WebviewPanel,
workbook: XLSX.WorkBook
workbook: XLSX.WorkBook,
cacheKey: string
): void {
const baseCacheKey = document.uri.toString();
webviewPanel.webview.onDidReceiveMessage(async message => {
if (message.type === 'getSheetPage') {
await this.handleGetSheetPage(
baseCacheKey,
cacheKey,
workbook,
webviewPanel,
message
@ -194,7 +206,7 @@ export class ExcelEditorProvider implements vscode.CustomReadonlyEditorProvider<
message: any
): Promise<void> {
const { sheetName, page, rowsPerPage, maxColumns } = message;
const cacheKey = `${baseCacheKey}-${sheetName}-page-${page}`;
const cacheKey = this.cache.generateSheetKey(baseCacheKey, sheetName, page);
this.updateLoadingProgress(webviewPanel, `Preparing page ${page + 1} of sheet: ${sheetName}`);
@ -205,8 +217,8 @@ export class ExcelEditorProvider implements vscode.CustomReadonlyEditorProvider<
let rangeInfo: any = null;
// check if this page is already cached
if (this.sheetCache.has(cacheKey)) {
sheetHtml = this.sheetCache.get(cacheKey)!;
if (this.cache.hasSheet(cacheKey)) {
sheetHtml = this.cache.getSheet(cacheKey)!;
} else {
const sheet = workbook.Sheets[sheetName];
@ -220,7 +232,7 @@ export class ExcelEditorProvider implements vscode.CustomReadonlyEditorProvider<
sheetHtml = this.processSheetPage(sheet, rangeInfo, page, rowsPerPage, maxColumns);
// cache the result
this.sheetCache.set(cacheKey, sheetHtml);
this.cache.setSheet(cacheKey, sheetHtml);
}
}
@ -264,8 +276,8 @@ export class ExcelEditorProvider implements vscode.CustomReadonlyEditorProvider<
const newPageSheet: XLSX.WorkSheet = { '!ref': pageRange };
// preserve important sheet properties
if (sheet['!cols']) newPageSheet['!cols'] = sheet['!cols'];
if (sheet['!rows']) newPageSheet['!rows'] = sheet['!rows'];
if (sheet['!cols']) { newPageSheet['!cols'] = sheet['!cols']; }
if (sheet['!rows']) { newPageSheet['!rows'] = sheet['!rows']; }
if (sheet['!merges']) {
// filter merges that are in this page's range
newPageSheet['!merges'] = sheet['!merges'].filter(merge => {
@ -296,4 +308,5 @@ export class ExcelEditorProvider implements vscode.CustomReadonlyEditorProvider<
// convert to HTML
return XLSX.utils.sheet_to_html(newPageSheet);
}
}

@ -1,6 +1,6 @@
// parse range like A1:Z100 and return information about dimensions
export function parseRange(rangeStr: string) {
if (!rangeStr || !rangeStr.includes(':')) return null;
if (!rangeStr || !rangeStr.includes(':')) { return null; }
const parts = rangeStr.split(':');
const startCell = parts[0];

@ -1,15 +1,98 @@
import * as assert from 'assert';
// You can import and use all API from the 'vscode' module
// as well as import your extension to test it
import * as vscode from 'vscode';
// import * as myExtension from '../../extension';
import * as path from 'path';
import * as fs from 'fs';
import * as os from 'os';
import * as XLSX from 'xlsx';
import { LRUCache } from '../cacheManagement/lruCache';
import { WorkbookCache } from '../cacheManagement/workbookCache';
suite('Extension Test Suite', () => {
vscode.window.showInformationMessage('Start all tests.');
test('Sample test', () => {
assert.strictEqual(-1, [1, 2, 3].indexOf(5));
assert.strictEqual(-1, [1, 2, 3].indexOf(0));
// test suite for the LRUCache class
suite('LRUCache Tests', () => {
test('Should store and retrieve values', () => {
const cache = new LRUCache<string, number>(3);
cache.set('a', 1);
cache.set('b', 2);
assert.strictEqual(cache.get('a'), 1);
assert.strictEqual(cache.get('b'), 2);
assert.strictEqual(cache.get('c'), undefined);
});
test('Should respect maximum size', () => {
const cache = new LRUCache<string, number>(2);
cache.set('a', 1);
cache.set('b', 2);
cache.set('c', 3);
// 'a' should be evicted as it's the least recently used
assert.strictEqual(cache.get('a'), undefined);
assert.strictEqual(cache.get('b'), 2);
assert.strictEqual(cache.get('c'), 3);
});
test('Should delete by predicate', () => {
const cache = new LRUCache<string, number>(5);
cache.set('a1', 1);
cache.set('a2', 2);
cache.set('b1', 3);
cache.set('b2', 4);
// delete all keys starting with 'a'
cache.deleteByPredicate(key => key.startsWith('a'));
assert.strictEqual(cache.get('a1'), undefined);
assert.strictEqual(cache.get('a2'), undefined);
assert.strictEqual(cache.get('b1'), 3);
assert.strictEqual(cache.get('b2'), 4);
});
});
// test suite for the WorkbookCache class
suite('WorkbookCache Tests', () => {
test('Should generate correct cache keys', () => {
const cache = new WorkbookCache(5, 20);
const uri = vscode.Uri.file('/path/to/file.xlsx');
const mtime = 12345;
const key = cache.generateKey(uri, mtime);
assert.strictEqual(key, `${uri.toString()}-${mtime}`);
const sheetKey = cache.generateSheetKey(key, 'Sheet1', 0);
assert.strictEqual(sheetKey, `${key}-Sheet1-page-0`);
});
test('Should store and retrieve workbooks', () => {
const cache = new WorkbookCache(5, 20);
const mockWorkbook = { SheetNames: ['Sheet1'], Sheets: { Sheet1: {} } } as XLSX.WorkBook;
const key = 'test-key';
cache.setWorkbook(key, mockWorkbook);
assert.strictEqual(cache.hasWorkbook(key), true);
assert.deepStrictEqual(cache.getWorkbook(key), mockWorkbook);
});
test('Should clear caches for a URI', () => {
const cache = new WorkbookCache(5, 20);
const baseUri = 'file:///path/to/file.xlsx';
// create workbook and sheet caches with the base URI
cache.setWorkbook(`${baseUri}-123`, { SheetNames: [], Sheets: {} } as XLSX.WorkBook);
cache.setSheet(`${baseUri}-123-Sheet1-page-0`, '<table></table>');
cache.setWorkbook(`${baseUri}-456`, { SheetNames: [], Sheets: {} } as XLSX.WorkBook);
// Create another cache entry with a different URI
cache.setWorkbook('file:///other/file.xlsx-789', { SheetNames: [], Sheets: {} } as XLSX.WorkBook);
// Clear caches for the base URI
cache.clearCachesForUri(baseUri);
// Check that the base URI caches are cleared but the other remains
assert.strictEqual(cache.hasWorkbook(`${baseUri}-123`), false);
assert.strictEqual(cache.hasSheet(`${baseUri}-123-Sheet1-page-0`), false);
assert.strictEqual(cache.hasWorkbook(`${baseUri}-456`), false);
assert.strictEqual(cache.hasWorkbook('file:///other/file.xlsx-789'), true);
});
});