This commit is contained in:
2026-05-26 08:30:08 +12:00
parent 005aab8139
commit 135a5a3b83
75 changed files with 22417 additions and 4288 deletions
+360
View File
@@ -0,0 +1,360 @@
#!/usr/bin/env node
// Cleans the legacy Gravity Forms contract CSV into structured JSON, then
// enriches data/legacy-onboarding.json with the owner email + postal address
// (and missing phone) wherever an onboarding row matches a contract row.
//
// Input:
// goodwalk-contract-2026-05-20.csv (repo root)
// data/legacy-onboarding.json (produced by clean-legacy-onboarding.mjs)
//
// Output:
// data/legacy-contracts.json (cleaned contracts)
// data/legacy-onboarding.json (enriched in place)
// data/legacy-clients.json (owner-email-keyed merged view)
//
// Run: node scripts/clean-legacy-contracts.mjs
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
import { dirname, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const ROOT = resolve(__dirname, '..');
const CONTRACT_CSV = resolve(ROOT, 'goodwalk-contract-2026-05-20.csv');
const ONBOARDING_JSON = resolve(ROOT, 'data/legacy-onboarding.json');
const CONTRACTS_OUT = resolve(ROOT, 'data/legacy-contracts.json');
const CLIENTS_OUT = resolve(ROOT, 'data/legacy-clients.json');
function parseCsv(text) {
const rows = [];
let cur = [];
let val = '';
let inQ = false;
for (let i = 0; i < text.length; i++) {
const c = text[i];
if (inQ) {
if (c === '"') {
if (text[i + 1] === '"') { val += '"'; i++; }
else inQ = false;
} else val += c;
} else {
if (c === '"') inQ = true;
else if (c === ',') { cur.push(val); val = ''; }
else if (c === '\n') { cur.push(val); rows.push(cur); cur = []; val = ''; }
else if (c === '\r') { /* skip */ }
else val += c;
}
}
if (val.length || cur.length) { cur.push(val); rows.push(cur); }
return rows;
}
const trimOrNull = (v) => {
const s = (v ?? '').trim();
return s ? s : null;
};
const lowerKey = (v) => (v ?? '').trim().toLowerCase().replace(/\s+/g, ' ');
function normalizePhone(raw) {
const original = (raw ?? '').trim();
if (!original) return { raw: null, e164: null };
let digits = original.replace(/[^\d+]/g, '');
if (digits.startsWith('+')) return { raw: original, e164: '+' + digits.slice(1).replace(/\D/g, '') };
digits = digits.replace(/\D/g, '');
if (!digits) return { raw: original, e164: null };
if (digits.startsWith('64')) return { raw: original, e164: '+' + digits };
if (digits.startsWith('0')) return { raw: original, e164: '+64' + digits.slice(1) };
return { raw: original, e164: null };
}
function composeAddress({ street, line2, city, suburb, postal, country }) {
const parts = [street, line2, suburb, city, postal, country]
.map((p) => (p ?? '').trim())
.filter(Boolean);
// De-dupe consecutive identical fragments (suburb sometimes duplicates city).
const deduped = [];
for (const p of parts) {
if (!deduped.length || deduped[deduped.length - 1].toLowerCase() !== p.toLowerCase()) {
deduped.push(p);
}
}
return deduped.length ? deduped.join(', ') : null;
}
// -- Parse contract CSV --------------------------------------------------------
const raw = readFileSync(CONTRACT_CSV, 'utf8').replace(/^/, '');
const rows = parseCsv(raw);
const headers = rows[0].map((h) => h.trim());
const data = rows.slice(1).filter((r) => r.some((c) => (c ?? '').trim() !== ''));
const idx = Object.fromEntries(headers.map((h, i) => [h, i]));
const col = (row, name) => row[idx[name]] ?? '';
const contracts = data.map((row) => {
const first = trimOrNull(col(row, 'Owners Name (First Name)'));
const middle = trimOrNull(col(row, 'Owners Name (Middle)'));
const last = trimOrNull(col(row, 'Owners Name (Last Name/Surname)'));
const fullName = [first, middle, last].filter(Boolean).join(' ') || null;
const phone = normalizePhone(col(row, 'Phone'));
return {
legacy: {
entryId: trimOrNull(col(row, 'Entry Id')),
entryDate: trimOrNull(col(row, 'Entry Date')),
dateUpdated: trimOrNull(col(row, 'Date Updated')),
createdByUserId: trimOrNull(col(row, 'Created By (User Id)')),
sourceUrl: trimOrNull(col(row, 'Source Url')),
userAgent: trimOrNull(col(row, 'User Agent')),
userIp: trimOrNull(col(row, 'User IP')),
pdfUrl: trimOrNull(col(row, 'PDF: PDF Label')),
signatureUrl: trimOrNull(col(row, 'Owner Signature')),
},
owner: {
firstName: first,
middleName: middle,
lastName: last,
fullName,
email: trimOrNull(col(row, "Owner's email (Enter Email)")),
phone: phone.e164,
phoneRaw: phone.raw,
address: composeAddress({
street: col(row, 'Residential Address (Street Address)'),
line2: col(row, 'Residential Address (Address Line 2)'),
city: col(row, 'Residential Address (City)'),
suburb: col(row, 'Residential Address (Suburb)'),
postal: col(row, 'Residential Address (ZIP / Postal Code)'),
country: col(row, 'Residential Address (Country)'),
}),
addressParts: {
street: trimOrNull(col(row, 'Residential Address (Street Address)')),
line2: trimOrNull(col(row, 'Residential Address (Address Line 2)')),
suburb: trimOrNull(col(row, 'Residential Address (Suburb)')),
city: trimOrNull(col(row, 'Residential Address (City)')),
postalCode: trimOrNull(col(row, 'Residential Address (ZIP / Postal Code)')),
country: trimOrNull(col(row, 'Residential Address (Country)')),
},
},
dog: {
fullName: trimOrNull(col(row, "Dog's name (include surname)")),
},
consent: {
checked: (col(row, 'Consent (Consent)') ?? '').trim().toLowerCase() === 'checked',
text: trimOrNull(col(row, 'Consent (Text)')),
signedOn: trimOrNull(col(row, 'Date contract signed')),
},
};
});
// -- Build contract lookup -----------------------------------------------------
// Some owners appear twice (re-signing) — keep the highest entryId per key.
function keepNewer(map, key, contract) {
const existing = map.get(key);
if (!existing) { map.set(key, contract); return; }
const a = Number(contract.legacy.entryId) || 0;
const b = Number(existing.legacy.entryId) || 0;
if (a > b) map.set(key, contract);
}
const byNameKey = new Map(); // "last|first"
const byLastKey = new Map(); // "last"
const byDogKey = new Map(); // dog full name lowercased
const byDogFirstWord = new Map(); // first token of dog name (handles surname mismatches)
for (const c of contracts) {
const last = lowerKey(c.owner.lastName);
const first = lowerKey(c.owner.firstName);
if (last && first) keepNewer(byNameKey, `${last}|${first}`, c);
if (last) keepNewer(byLastKey, last, c);
if (c.dog.fullName) {
keepNewer(byDogKey, lowerKey(c.dog.fullName), c);
const firstToken = lowerKey(c.dog.fullName).split(/\s+/)[0];
if (firstToken) keepNewer(byDogFirstWord, `${firstToken}|${last}`, c);
}
}
// -- Enrich onboarding ---------------------------------------------------------
if (!existsSync(ONBOARDING_JSON)) {
console.error(`Onboarding JSON not found at ${ONBOARDING_JSON}. Run clean-legacy-onboarding.mjs first.`);
process.exit(1);
}
const onboardingPayload = JSON.parse(readFileSync(ONBOARDING_JSON, 'utf8'));
let matched = 0;
let backfilledEmail = 0;
let backfilledAddress = 0;
let backfilledPhone = 0;
const unmatched = [];
for (const rec of onboardingPayload.records) {
const last = lowerKey(rec.owner.lastName);
const first = lowerKey(rec.owner.firstName);
const dogFull = lowerKey([rec.dog.name, rec.dog.surname].filter(Boolean).join(' '));
const dogFirst = lowerKey(rec.dog.name);
let match = null;
let matchedBy = null;
if (last && first && byNameKey.has(`${last}|${first}`)) {
match = byNameKey.get(`${last}|${first}`);
matchedBy = 'owner_name';
} else if (dogFull && byDogKey.has(dogFull)) {
match = byDogKey.get(dogFull);
matchedBy = 'dog_full_name';
} else if (dogFirst && last && byDogFirstWord.has(`${dogFirst}|${last}`)) {
match = byDogFirstWord.get(`${dogFirst}|${last}`);
matchedBy = 'dog_first_owner_last';
} else if (last && byLastKey.has(last)) {
// Last-resort: lone surname match. Only accept if surname is unique enough
// (i.e. only one contract has it).
const candidates = contracts.filter((c) => lowerKey(c.owner.lastName) === last);
if (candidates.length === 1) {
match = candidates[0];
matchedBy = 'owner_last_only';
}
}
if (!match) {
unmatched.push({
onboardingEntryId: rec.legacy.entryId,
owner: rec.owner.fullName,
dog: [rec.dog.name, rec.dog.surname].filter(Boolean).join(' '),
});
continue;
}
matched++;
if (!rec.owner.email && match.owner.email) {
rec.owner.email = match.owner.email;
backfilledEmail++;
}
if (!rec.owner.address && match.owner.address) {
rec.owner.address = match.owner.address;
rec.owner.addressParts = match.owner.addressParts;
backfilledAddress++;
}
if (!rec.owner.phone && match.owner.phone) {
rec.owner.phone = match.owner.phone;
rec.owner.phoneRaw = match.owner.phoneRaw;
backfilledPhone++;
}
rec.legacy.contractMatch = {
entryId: match.legacy.entryId,
matchedBy,
signedOn: match.consent.signedOn,
contractPdfUrl: match.legacy.pdfUrl,
};
}
// -- Write outputs -------------------------------------------------------------
mkdirSync(dirname(CONTRACTS_OUT), { recursive: true });
writeFileSync(
CONTRACTS_OUT,
JSON.stringify({
exportedAt: new Date().toISOString(),
source: { file: 'goodwalk-contract-2026-05-20.csv', rows: data.length, columns: headers.length },
records: contracts,
}, null, 2) + '\n',
'utf8',
);
onboardingPayload.enrichedAt = new Date().toISOString();
onboardingPayload.notes = [
...(onboardingPayload.notes ?? []),
'Enriched from goodwalk-contract-2026-05-20.csv: owner email + postal address backfilled where a contract row matched.',
];
writeFileSync(ONBOARDING_JSON, JSON.stringify(onboardingPayload, null, 2) + '\n', 'utf8');
// -- Build a clients view keyed by email ---------------------------------------
// This is the shape that maps most naturally to a Postgres `clients` table.
const clientsByEmail = new Map();
function upsertClient(email, partial) {
const key = (email ?? '').toLowerCase().trim();
if (!key) return;
const existing = clientsByEmail.get(key) ?? {
email: key,
firstName: null,
lastName: null,
phone: null,
phoneRaw: null,
address: null,
addressParts: null,
dogs: [],
onboardingEntryIds: [],
contractEntryIds: [],
};
for (const [k, v] of Object.entries(partial)) {
if (v == null) continue;
if (k === 'dogs') {
for (const dog of v) {
if (!existing.dogs.find((d) => (d.name ?? '').toLowerCase() === (dog.name ?? '').toLowerCase())) {
existing.dogs.push(dog);
}
}
} else if (k === 'onboardingEntryIds' || k === 'contractEntryIds') {
for (const id of v) if (!existing[k].includes(id)) existing[k].push(id);
} else if (existing[k] == null) {
existing[k] = v;
}
}
clientsByEmail.set(key, existing);
}
for (const c of contracts) {
if (!c.owner.email) continue;
upsertClient(c.owner.email, {
firstName: c.owner.firstName,
lastName: c.owner.lastName,
phone: c.owner.phone,
phoneRaw: c.owner.phoneRaw,
address: c.owner.address,
addressParts: c.owner.addressParts,
dogs: c.dog.fullName ? [{ name: c.dog.fullName, source: 'contract' }] : [],
contractEntryIds: c.legacy.entryId ? [c.legacy.entryId] : [],
});
}
for (const rec of onboardingPayload.records) {
if (!rec.owner.email) continue;
const dogName = [rec.dog.name, rec.dog.surname].filter(Boolean).join(' ');
upsertClient(rec.owner.email, {
firstName: rec.owner.firstName,
lastName: rec.owner.lastName,
phone: rec.owner.phone,
phoneRaw: rec.owner.phoneRaw,
address: rec.owner.address,
addressParts: rec.owner.addressParts,
dogs: dogName ? [{
name: dogName,
dateOfBirth: rec.dog.dateOfBirth,
breed: rec.dog.breed,
source: 'onboarding',
}] : [],
onboardingEntryIds: rec.legacy.entryId ? [rec.legacy.entryId] : [],
});
}
writeFileSync(
CLIENTS_OUT,
JSON.stringify({
exportedAt: new Date().toISOString(),
note: 'Owner-email-keyed merged view. Maps 1:1 to a Postgres `clients` table; the `dogs` array maps to a `dogs` table with a clients_id FK.',
clients: [...clientsByEmail.values()].sort((a, b) => a.email.localeCompare(b.email)),
}, null, 2) + '\n',
'utf8',
);
// -- Summary -------------------------------------------------------------------
console.log(JSON.stringify({
contracts: contracts.length,
contractsWithEmail: contracts.filter((c) => c.owner.email).length,
onboardingRecords: onboardingPayload.records.length,
matched,
unmatched: unmatched.length,
backfilledEmail,
backfilledAddress,
backfilledPhone,
uniqueClientsByEmail: clientsByEmail.size,
unmatchedSample: unmatched.slice(0, 10),
outputs: { CONTRACTS_OUT, ONBOARDING_JSON, CLIENTS_OUT },
}, null, 2));