export interface Label { centroid: Type; data: Type[]; } export type DistanceFunc = (a: Type, b: Type) => number; export interface shuffleItem { item: Type, random: number, } /** * 从数据集中随机挑选指定数量的数据 * @param dataset * @param size * @returns */ export function randomPickup(dataset: Type[], size: number): Type[] { if (dataset.length <= size) return dataset; let shuffle: Type[] = dataset.map(item => { return { item: item, random: Math.random() } as shuffleItem; }) .sort((a: shuffleItem, b: shuffleItem) => { return a.random - b.random; }) .map((si: shuffleItem) => si.item).slice(0, size); return shuffle; } /** * 挑选数据集中心点 * @param dataset * @param distanceFunc * @returns */ export function pickupCentroid(dataset: Type[], distanceFunc: DistanceFunc): Type { let minDist = Number.MAX_SAFE_INTEGER; let minDistIndex = 0; for (var i = 0; i < dataset.length; i++) { let dist = dataset.reduce((r: number, c: Type) => { return r + distanceFunc(c, dataset[i]); }, 0); if (dist < minDist) { minDist = dist; minDistIndex = i; } } return dataset[minDistIndex]; } /** * 验证 k-means结果 * @param labels * @param distanceFunc * @param minDist * @returns */ export function validateKmeans(labels: Label[], distanceFunc: DistanceFunc, minDist): boolean { for (var i = 0; i < labels.length; i++) { let label = labels[i]; for (var j = 0; j < label.data.length; j++) { if (distanceFunc(label.centroid, label.data[j]) >= minDist) return false; } } return true; } /** * * @param dataset * @param k * @param distanceFunc * @returns */ export function kmeans(dataset: Type[], k: number, distanceFunc: DistanceFunc): Label[] { let centroids: Type[] = randomPickup(dataset, k); let loop = 0; let labels: Label[]; do { labels = centroids.map((item: Type) => { return { centroid: item, data: [] } as Label; }) //按中心点进行归类 for (var i = 0; i < dataset.length; i++) { let item: Type = dataset[i]; let nearestLabel: Label = null; let neareastDist: number = 0; let dist: number; for (var j = 0; j < labels.length; j++) { dist = distanceFunc(item, labels[j].centroid); if (nearestLabel == null || dist < neareastDist) { nearestLabel = labels[j]; neareastDist = dist; } } nearestLabel.data.push(item); } //validate Result if (validateKmeans(labels, distanceFunc, 4) || loop > 100) break; //重新计算中心点 centroids = labels.map(label => pickupCentroid(label.data, distanceFunc)) loop++; } while (true); console.log('loop:', loop); return labels; } export interface Split { mergable: Type[]; standalone: Type[]; } export function splitByMinDistance(dataset: Type[], minDist: number = 2.3, distanceFunc: DistanceFunc): Split { let standalone: Type[] = []; let mergable: Type[] = []; for (var i = 0; i < dataset.length; i++) { let nereasets = dataset.filter(item => distanceFunc(item, dataset[i]) < minDist); if (nereasets.length <= 1) standalone.push(dataset[i]); else mergable.push(dataset[i]); } return { mergable, standalone }; } export interface Neigbours { self: Type; neigbours: Type[]; } /** * Generic merge by distance. * @param dataset * @param minDist * @param distanceFunc * @returns */ export function simpleMerge(dataset: Type[], minDist: number = 2.3, distanceFunc: DistanceFunc): Neigbours[] { let data = [...dataset]; let result = []; let list: Neigbours[] = data.map((item: Type) => ({ self: item, neigbours: [] })); //找到每个颜色最近的颜色 list.forEach(nei => { nei.neigbours = data.filter(item => distanceFunc(item, nei.self) < minDist); }) do { list = list.sort((a, b) => a.neigbours.length - b.neigbours.length); //console.log(list.length); let best = list.pop(); result.push(best); list = list.filter(nei => best.neigbours.indexOf(nei.self) < 0) if (best.neigbours.length > 1) { list.forEach(nei => { nei.neigbours = nei.neigbours.filter(item => best.neigbours.indexOf(item) < 0); }) } } while (list.length > 0); return result; }