libStatGen Software 1
PedigreeFamily.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "Pedigree.h"
19#include "Constant.h"
20#include "MathConstant.h"
21#include "Error.h"
22
23#include <stdlib.h>
24#include <ctype.h>
25#include <string.h>
26#include <limits.h>
27
28Family::Family(Pedigree & pedigree, int _first, int _last, int _serial) :
29 ped(pedigree)
30{
31 serial = _serial;
32 first = _first;
33 last = _last;
34 count = last - first + 1;
35 path = new int [count];
36 famid = ped[first].famid;
37
38 founders = mzTwins = 0;
39
40 for (int i=first; i<=last; i++)
41 if (ped[i].isFounder())
42 {
43 ped[i].traverse = founders;
44 path[founders++] = ped[i].serial;
45 }
46 else
47 {
48 ped[i].traverse = -1;
49 if (ped[i].isMzTwin(ped[i]))
50 for (int j = first; j < i; j++)
51 if (ped[i].isMzTwin(ped[j]))
52 {
53 mzTwins++;
54 break;
55 }
56 }
57
58 nonFounders = count - founders;
59 generations = nonFounders == 0 ? 1 : 2;
60
61 int next = founders;
62 while (next < count)
63 {
64 bool check = false;
65
66 // Create traversal where path ancestors precede their offspring
67 for (int i=first; i<=last; i++)
68 if (ped[i].traverse == -1)
69 {
70 int fatherSerial = ped[i].father->traverse;
71 int motherSerial = ped[i].mother->traverse;
72
73 if (fatherSerial >= 0 && motherSerial >= 0)
74 {
75 check = true;
76
77 ped[i].traverse = next;
78 path[next++] = i;
79
80 if (fatherSerial >= founders || motherSerial >= founders)
81 generations = 3;
82
83 // If this individual is part of a set of MZ twins
84 if (ped[i].zygosity & 1)
85 for (int j = 0; j < ped[i].sibCount; j++)
86 {
87 Person & sib = *ped[i].sibs[j];
88
89 // Insert all co-twins at the same position in traversal
90 // order
91 if (sib.traverse == -1 && ped[i].zygosity == sib.zygosity)
92 {
93 sib.traverse = next;
94 path[next++] = sib.serial;
95 }
96 }
97 }
98 }
99
100 if (!check) ShowInvalidCycles();
101 }
102}
103
104Family::~Family()
105{
106 delete [] path;
107}
108
109void Family::ShowInvalidCycles()
110{
111 // Try and identify key individuals responsible for
112 // pedigree mess-up ... when this function is called
113 // pedigree has been traversed top-down and individuals
114 // that are correctly specified have IDs of >= 0.
115
116 // This routine traverses the pedigree bottom up to
117 // identify a subset of individuals likely to be causing
118 // the problem
119 IntArray descendants(ped.count);
120 descendants.Zero();
121
122 for (int i = first; i <= last; i++)
123 if (ped[i].traverse == -1)
124 {
125 descendants[ped[i].father->serial]++;
126 descendants[ped[i].mother->serial]++;
127 }
128
129 IntArray stack;
130
131 for (int i = first; i <= last; i++)
132 if (ped[i].traverse == -1 && descendants[i] == 0)
133 {
134 stack.Push(i);
135
136 do
137 {
138 int j = stack.Pop();
139
140 if (ped[j].traverse != -1) continue;
141
142 ped[j].traverse = 9999;
143
144 if (--descendants[ped[j].father->serial] == 0)
145 stack.Push(ped[j].father->serial);
146 if (--descendants[ped[j].mother->serial] == 0)
147 stack.Push(ped[j].mother->serial);
148 }
149 while (stack.Length());
150 }
151
152 printf("The structure of family %s requires\n"
153 "an individual to be his own ancestor.\n\n"
154 "To identify the problem(s), examine the\n"
155 "following key individuals:\n\n",
156 (const char *) famid);
157
158 for (int i = first; i <= last; i++)
159 if (ped[i].traverse == -1)
160 printf("Problem Person: %s\n", (const char *) ped[i].pid);
161
162 error("Invalid pedigree structure.");
163}
164
165int Family::ConnectedGroups(IntArray * groupMembership)
166{
167 IntArray groups(count);
168
169 // Use the quick union algorithm to identify connected groups
170 groups.SetSequence(0, 1);
171 for (int i = count - 1; i >= founders; i--)
172 {
173 // Lookup parents
174 int group0 = i;
175 int group1 = ped[path[i]].father->traverse;
176 int group2 = ped[path[i]].mother->traverse;
177
178 // Identify their corresponding groupings
179 while (groups[group0] != group0) group0 = groups[group0];
180 while (groups[group1] != group1) group1 = groups[group1];
181 while (groups[group2] != group2) group2 = groups[group2];
182
183 int group = group1 < group2 ? group1 : group2;
184 if (group0 < group) group = group0;
185
186 groups[group0] = groups[group1] = groups[group2] = group;
187 }
188
189 // Count groupings
190 int groupCount = 0;
191 for (int i = 0; i < founders; i++)
192 if (groups[i] == i)
193 groupCount++;
194
195 if (groupMembership == NULL)
196 return groupCount;
197
198 // Flatten tree so all items point to root
199 for (int i = 1; i < count; i++)
200 groups[i] = groups[groups[i]];
201
202 // Update group membership info
203 int group = 0;
204 groupMembership->Dimension(count);
205 for (int i = 0; i < count; i++)
206 if (groups[i] == i)
207 (*groupMembership)[i] = ++group;
208 else
209 (*groupMembership)[i] = (*groupMembership)[groups[i]];
210
211#if 0
212 // This stretch of code outputs family structure and group membership
213 // And should usually be commented out!
214 for (int j = first; j <= last; j++)
215 printf("%s %s %s %s %d %d\n",
216 (const char *) famid, (const char *) ped[j].pid,
217 (const char *) ped[j].fatid, (const char *) ped[j].motid,
218 ped[j].sex, groups[ped[j].traverse]);
219#endif
220
221 return groupCount;
222}
223
224/*
225int Family::ConnectedGroups(IntArray * groupMembership)
226 {
227 IntArray * stack = new IntArray[count];
228 IntArray groups(count);
229
230 groups.Zero();
231
232 int group = 0;
233 int seed = count - 1;
234
235 // Search for connected sets of individuals until everyone is accounted for
236 while (true)
237 {
238 while ((seed >= 0) && (groups[seed] != 0))
239 seed--;
240
241 if (seed == -1)
242 break;
243
244 Mark(seed, ++group, stack, groups);
245
246 for (int j = seed; j >= founders; j--)
247 if (groups[j] == 0)
248 {
249 int fat_j = ped[path[j]].father->traverse;
250 int mot_j = ped[path[j]].mother->traverse;
251
252 if (groups[fat_j] == group || groups[mot_j] == group)
253 Mark(j, group, stack, groups);
254 else
255 stack[mot_j].Push(j),
256 stack[fat_j].Push(j);
257 }
258
259 for (int j = 0; j < count; j++)
260 stack[j].Clear();
261 }
262
263 if (groupMembership != NULL)
264 (*groupMembership) = groups;
265
266 // This stretch of code outputs family structure and group membership
267 // And should usually be commented out!
268#if 0
269 for (int j = first; j <= last; j++)
270 printf("%s %s %s %s %d %d\n",
271 (const char *) famid, (const char *) ped[j].pid,
272 (const char *) ped[j].fatid, (const char *) ped[j].motid,
273 ped[j].sex, groups[ped[j].traverse]);
274#endif
275
276 delete [] stack;
277
278 return group;
279 }
280
281void Family::Mark(int j, int group, IntArray * stack, IntArray & groups)
282 {
283 if (groups[j] == group) return;
284
285 groups[j] = group;
286
287 while (stack[j].Length())
288 Mark(stack[j].Pop(), group, stack, groups);
289
290 if (j < founders) return;
291
292 Mark(ped[path[j]].father->traverse, group, stack, groups);
293 Mark(ped[path[j]].mother->traverse, group, stack, groups);
294 }
295*/