My Project
kmeans.hh
Go to the documentation of this file.
1/* -*- mia-c++ -*-
2 *
3 * This file is part of MIA - a toolbox for medical image analysis
4 * Copyright (c) Leipzig, Madrid 1999-2017 Gert Wollny
5 *
6 * MIA is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with MIA; if not, see <http://www.gnu.org/licenses/>.
18 *
19 */
20
21#ifndef __mia_core_kmeans_hh
22#define __mia_core_kmeans_hh
23#include <vector>
24#include <numeric>
25#include <cmath>
26#include <stdexcept>
27#include <iomanip>
28#include <limits>
29#include <mia/core/defines.hh>
31#include <mia/core/msgstream.hh>
32#include <boost/concept/requires.hpp>
33#include <boost/concept_check.hpp>
34
36
37int EXPORT_CORE kmeans_get_closest_clustercenter(const std::vector<double>& classes, size_t l, double value);
38
39
40template <typename InputIterator, typename OutputIterator>
41bool kmeans_step(InputIterator ibegin, InputIterator iend, OutputIterator obegin,
42 std::vector<double>& classes, size_t l, int& biggest_class )
43{
44 cvdebug() << "kmeans enter: ";
45
46 for (size_t i = 0; i <= l; ++i )
47 cverb << std::setw(8) << classes[i] << " ";
48
49 cverb << "\n";
50 biggest_class = -1;
51 const double convLimit = 0.005; // currently fixed
52 std::vector<double> sums(classes.size());
53 std::vector<size_t> count(classes.size());
54 bool conv = false;
55 int iter = 50;
56
57 while ( iter-- && !conv) {
58 sort(classes.begin(), classes.end());
59 // assign closest cluster center
60 OutputIterator ob = obegin;
61
62 for (InputIterator b = ibegin; b != iend; ++b, ++ob) {
63 *ob = kmeans_get_closest_clustercenter(classes, l, *b);
64 ++count[*ob];
65 sums[*ob] += *b;
66 };
67
68 // recompute cluster centers
69 conv = true;
70
71 size_t max_count = 0;
72
73 for (size_t i = 0; i <= l; i++) {
74 if (count[i]) {
75 double a = sums[i] / count[i];
76
77 if (a && fabs ((a - classes[i]) / a) > convLimit)
78 conv = false;
79
80 classes[i] = a;
81
82 if (max_count < count[i]) {
83 max_count = count[i];
84 biggest_class = i;
85 }
86 } else { // if a class is empty move it closer to neighbour
87 if (i == 0)
88 classes[i] = (classes[i] + classes[i + 1]) / 2.0;
89 else
90 classes[i] = (classes[i] + classes[i - 1]) / 2.0;
91
92 conv = false;
93 }
94
95 sums[i] = 0;
96 count[i] = 0;
97 };
98 };
99
100 cvinfo() << "kmeans: " << l + 1 << " classes, " << 50 - iter << " iterations";
101
102 for (size_t i = 0; i <= l; ++i )
103 cverb << std::setw(8) << classes[i] << " ";
104
105 cverb << "\n";
106 return conv;
107}
108
109
110template <typename InputIterator, typename OutputIterator>
111bool kmeans_step_with_fixed_centers(InputIterator ibegin, InputIterator iend, OutputIterator obegin,
112 std::vector<double>& classes, const std::vector<bool>& fixed_center,
113 size_t l, int& biggest_class )
114{
115 cvdebug() << "kmeans enter: ";
116
117 for (size_t i = 0; i <= l; ++i )
118 cverb << std::setw(8) << classes[i] << " ";
119
120 cverb << "\n";
121 biggest_class = -1;
122 const double convLimit = 0.005; // currently fixed
123 std::vector<double> sums(classes.size());
124 std::vector<size_t> count(classes.size());
125 bool conv = false;
126 int iter = 50;
127
128 while ( iter-- && !conv) {
129 sort(classes.begin(), classes.end());
130 // assign closest cluster center
131 OutputIterator ob = obegin;
132
133 for (InputIterator b = ibegin; b != iend; ++b, ++ob) {
134 *ob = kmeans_get_closest_clustercenter(classes, l, *b);
135 ++count[*ob];
136 sums[*ob] += *b;
137 };
138
139 // recompute cluster centers
140 conv = true;
141
142 size_t max_count = 0;
143
144 for (size_t i = 0; i <= l; i++) {
145 if (fixed_center[i])
146 continue;
147
148 if (count[i]) {
149 double a = sums[i] / count[i];
150
151 if (a && fabs ((a - classes[i]) / a) > convLimit)
152 conv = false;
153
154 classes[i] = a;
155
156 if (max_count < count[i]) {
157 max_count = count[i];
158 biggest_class = i;
159 }
160 } else { // if a class is empty move it closer to neighbour
161 if (i == 0)
162 classes[i] = (classes[i] + classes[i + 1]) / 2.0;
163 else
164 classes[i] = (classes[i] + classes[i - 1]) / 2.0;
165
166 conv = false;
167 }
168
169 sums[i] = 0;
170 count[i] = 0;
171 };
172 };
173
174 cvinfo() << "kmeans: " << l + 1 << " classes, " << 50 - iter << " iterations";
175
176 for (size_t i = 0; i <= l; ++i )
177 cverb << std::setw(8) << classes[i] << " ";
178
179 cverb << "\n";
180 return conv;
181}
182
183
200template <typename InputIterator, typename OutputIterator>
201BOOST_CONCEPT_REQUIRES( ((::boost::ForwardIterator<InputIterator>))
202 ((::boost::Mutable_ForwardIterator<OutputIterator>)),
203 (void)
204 )
205kmeans(InputIterator ibegin, InputIterator iend, OutputIterator obegin,
206 std::vector<double>& classes)
207{
208 if (classes.size() < 2)
209 throw create_exception<std::invalid_argument>("kmeans: requested ", classes.size(),
210 "class(es), required are at least two");
211
212 const size_t nclusters = classes.size();
213 const double size = std::distance(ibegin, iend);
214
215 if ( size < nclusters )
216 throw create_exception<std::invalid_argument>("kmeans: insufficient input: want ", nclusters,
217 " classes, but git only ", size, " input elements");
218
219 double sum = std::accumulate(ibegin, iend, 0.0);
220 // simple initialization splitting at the mean
221 classes[0] = sum / (size - 1);
222 classes[1] = sum / (size + 1);
223 // first run calles directly
224 int biggest_class = 0;
225 // coverity is completely off here, the 1UL is actually a class index
226 // and has nothing to do with the size of the type pointed to by ibegin
227 //
228 // coverity[sizeof_mismatch]
229 kmeans_step(ibegin, iend, obegin, classes, 1, biggest_class);
230
231 // further clustering always splits biggest class
232 for (size_t l = 2; l < nclusters; l++) {
233 const size_t pos = biggest_class > 0 ? biggest_class - 1 : biggest_class + 1;
234 classes[l] = 0.5 * (classes[biggest_class] + classes[pos]);
235 kmeans_step(ibegin, iend, obegin, classes, l, biggest_class);
236 };
237
238 // some post iteration until centers no longer change
239 for (size_t l = 1; l < 3; l++) {
240 if (kmeans_step(ibegin, iend, obegin, classes, nclusters - 1, biggest_class))
241 break;
242 }
243}
244
246
247#endif
double fabs(const T3DVector< T > &t)
A way to get the norm of a T3DVector using faba syntax.
Definition: 3d/vector.hh:396
#define NS_MIA_BEGIN
conveniance define to start the mia namespace
Definition: defines.hh:33
#define EXPORT_CORE
Macro to manage Visual C++ style dllimport/dllexport.
Definition: defines.hh:101
#define NS_MIA_END
conveniance define to end the mia namespace
Definition: defines.hh:36
static F::result_type accumulate(F &f, const B &data)
Definition: core/filter.hh:371
#define cverb
define a shortcut to the raw output stream
Definition: msgstream.hh:341
vstream & cvinfo()
informal output that may be of interest to understand problems with a program and are of higher prior...
Definition: msgstream.hh:262
void kmeans(InputIterator ibegin, InputIterator iend, OutputIterator obegin, std::vector< double > &classes)
Definition: kmeans.hh:205
bool kmeans_step_with_fixed_centers(InputIterator ibegin, InputIterator iend, OutputIterator obegin, std::vector< double > &classes, const std::vector< bool > &fixed_center, size_t l, int &biggest_class)
Definition: kmeans.hh:111
bool kmeans_step(InputIterator ibegin, InputIterator iend, OutputIterator obegin, std::vector< double > &classes, size_t l, int &biggest_class)
Definition: kmeans.hh:41
int EXPORT_CORE kmeans_get_closest_clustercenter(const std::vector< double > &classes, size_t l, double value)
CDebugSink & cvdebug()
Definition: msgstream.hh:226