Files
gw-svelte/scripts/clean-legacy-contracts.mjs
T
2026-05-26 08:30:08 +12:00

361 lines
13 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
// Cleans the legacy Gravity Forms contract CSV into structured JSON, then
// enriches data/legacy-onboarding.json with the owner email + postal address
// (and missing phone) wherever an onboarding row matches a contract row.
//
// Input:
// goodwalk-contract-2026-05-20.csv (repo root)
// data/legacy-onboarding.json (produced by clean-legacy-onboarding.mjs)
//
// Output:
// data/legacy-contracts.json (cleaned contracts)
// data/legacy-onboarding.json (enriched in place)
// data/legacy-clients.json (owner-email-keyed merged view)
//
// Run: node scripts/clean-legacy-contracts.mjs
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
import { dirname, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const ROOT = resolve(__dirname, '..');
const CONTRACT_CSV = resolve(ROOT, 'goodwalk-contract-2026-05-20.csv');
const ONBOARDING_JSON = resolve(ROOT, 'data/legacy-onboarding.json');
const CONTRACTS_OUT = resolve(ROOT, 'data/legacy-contracts.json');
const CLIENTS_OUT = resolve(ROOT, 'data/legacy-clients.json');
function parseCsv(text) {
const rows = [];
let cur = [];
let val = '';
let inQ = false;
for (let i = 0; i < text.length; i++) {
const c = text[i];
if (inQ) {
if (c === '"') {
if (text[i + 1] === '"') { val += '"'; i++; }
else inQ = false;
} else val += c;
} else {
if (c === '"') inQ = true;
else if (c === ',') { cur.push(val); val = ''; }
else if (c === '\n') { cur.push(val); rows.push(cur); cur = []; val = ''; }
else if (c === '\r') { /* skip */ }
else val += c;
}
}
if (val.length || cur.length) { cur.push(val); rows.push(cur); }
return rows;
}
const trimOrNull = (v) => {
const s = (v ?? '').trim();
return s ? s : null;
};
const lowerKey = (v) => (v ?? '').trim().toLowerCase().replace(/\s+/g, ' ');
function normalizePhone(raw) {
const original = (raw ?? '').trim();
if (!original) return { raw: null, e164: null };
let digits = original.replace(/[^\d+]/g, '');
if (digits.startsWith('+')) return { raw: original, e164: '+' + digits.slice(1).replace(/\D/g, '') };
digits = digits.replace(/\D/g, '');
if (!digits) return { raw: original, e164: null };
if (digits.startsWith('64')) return { raw: original, e164: '+' + digits };
if (digits.startsWith('0')) return { raw: original, e164: '+64' + digits.slice(1) };
return { raw: original, e164: null };
}
function composeAddress({ street, line2, city, suburb, postal, country }) {
const parts = [street, line2, suburb, city, postal, country]
.map((p) => (p ?? '').trim())
.filter(Boolean);
// De-dupe consecutive identical fragments (suburb sometimes duplicates city).
const deduped = [];
for (const p of parts) {
if (!deduped.length || deduped[deduped.length - 1].toLowerCase() !== p.toLowerCase()) {
deduped.push(p);
}
}
return deduped.length ? deduped.join(', ') : null;
}
// -- Parse contract CSV --------------------------------------------------------
const raw = readFileSync(CONTRACT_CSV, 'utf8').replace(/^/, '');
const rows = parseCsv(raw);
const headers = rows[0].map((h) => h.trim());
const data = rows.slice(1).filter((r) => r.some((c) => (c ?? '').trim() !== ''));
const idx = Object.fromEntries(headers.map((h, i) => [h, i]));
const col = (row, name) => row[idx[name]] ?? '';
const contracts = data.map((row) => {
const first = trimOrNull(col(row, 'Owners Name (First Name)'));
const middle = trimOrNull(col(row, 'Owners Name (Middle)'));
const last = trimOrNull(col(row, 'Owners Name (Last Name/Surname)'));
const fullName = [first, middle, last].filter(Boolean).join(' ') || null;
const phone = normalizePhone(col(row, 'Phone'));
return {
legacy: {
entryId: trimOrNull(col(row, 'Entry Id')),
entryDate: trimOrNull(col(row, 'Entry Date')),
dateUpdated: trimOrNull(col(row, 'Date Updated')),
createdByUserId: trimOrNull(col(row, 'Created By (User Id)')),
sourceUrl: trimOrNull(col(row, 'Source Url')),
userAgent: trimOrNull(col(row, 'User Agent')),
userIp: trimOrNull(col(row, 'User IP')),
pdfUrl: trimOrNull(col(row, 'PDF: PDF Label')),
signatureUrl: trimOrNull(col(row, 'Owner Signature')),
},
owner: {
firstName: first,
middleName: middle,
lastName: last,
fullName,
email: trimOrNull(col(row, "Owner's email (Enter Email)")),
phone: phone.e164,
phoneRaw: phone.raw,
address: composeAddress({
street: col(row, 'Residential Address (Street Address)'),
line2: col(row, 'Residential Address (Address Line 2)'),
city: col(row, 'Residential Address (City)'),
suburb: col(row, 'Residential Address (Suburb)'),
postal: col(row, 'Residential Address (ZIP / Postal Code)'),
country: col(row, 'Residential Address (Country)'),
}),
addressParts: {
street: trimOrNull(col(row, 'Residential Address (Street Address)')),
line2: trimOrNull(col(row, 'Residential Address (Address Line 2)')),
suburb: trimOrNull(col(row, 'Residential Address (Suburb)')),
city: trimOrNull(col(row, 'Residential Address (City)')),
postalCode: trimOrNull(col(row, 'Residential Address (ZIP / Postal Code)')),
country: trimOrNull(col(row, 'Residential Address (Country)')),
},
},
dog: {
fullName: trimOrNull(col(row, "Dog's name (include surname)")),
},
consent: {
checked: (col(row, 'Consent (Consent)') ?? '').trim().toLowerCase() === 'checked',
text: trimOrNull(col(row, 'Consent (Text)')),
signedOn: trimOrNull(col(row, 'Date contract signed')),
},
};
});
// -- Build contract lookup -----------------------------------------------------
// Some owners appear twice (re-signing) — keep the highest entryId per key.
function keepNewer(map, key, contract) {
const existing = map.get(key);
if (!existing) { map.set(key, contract); return; }
const a = Number(contract.legacy.entryId) || 0;
const b = Number(existing.legacy.entryId) || 0;
if (a > b) map.set(key, contract);
}
const byNameKey = new Map(); // "last|first"
const byLastKey = new Map(); // "last"
const byDogKey = new Map(); // dog full name lowercased
const byDogFirstWord = new Map(); // first token of dog name (handles surname mismatches)
for (const c of contracts) {
const last = lowerKey(c.owner.lastName);
const first = lowerKey(c.owner.firstName);
if (last && first) keepNewer(byNameKey, `${last}|${first}`, c);
if (last) keepNewer(byLastKey, last, c);
if (c.dog.fullName) {
keepNewer(byDogKey, lowerKey(c.dog.fullName), c);
const firstToken = lowerKey(c.dog.fullName).split(/\s+/)[0];
if (firstToken) keepNewer(byDogFirstWord, `${firstToken}|${last}`, c);
}
}
// -- Enrich onboarding ---------------------------------------------------------
if (!existsSync(ONBOARDING_JSON)) {
console.error(`Onboarding JSON not found at ${ONBOARDING_JSON}. Run clean-legacy-onboarding.mjs first.`);
process.exit(1);
}
const onboardingPayload = JSON.parse(readFileSync(ONBOARDING_JSON, 'utf8'));
let matched = 0;
let backfilledEmail = 0;
let backfilledAddress = 0;
let backfilledPhone = 0;
const unmatched = [];
for (const rec of onboardingPayload.records) {
const last = lowerKey(rec.owner.lastName);
const first = lowerKey(rec.owner.firstName);
const dogFull = lowerKey([rec.dog.name, rec.dog.surname].filter(Boolean).join(' '));
const dogFirst = lowerKey(rec.dog.name);
let match = null;
let matchedBy = null;
if (last && first && byNameKey.has(`${last}|${first}`)) {
match = byNameKey.get(`${last}|${first}`);
matchedBy = 'owner_name';
} else if (dogFull && byDogKey.has(dogFull)) {
match = byDogKey.get(dogFull);
matchedBy = 'dog_full_name';
} else if (dogFirst && last && byDogFirstWord.has(`${dogFirst}|${last}`)) {
match = byDogFirstWord.get(`${dogFirst}|${last}`);
matchedBy = 'dog_first_owner_last';
} else if (last && byLastKey.has(last)) {
// Last-resort: lone surname match. Only accept if surname is unique enough
// (i.e. only one contract has it).
const candidates = contracts.filter((c) => lowerKey(c.owner.lastName) === last);
if (candidates.length === 1) {
match = candidates[0];
matchedBy = 'owner_last_only';
}
}
if (!match) {
unmatched.push({
onboardingEntryId: rec.legacy.entryId,
owner: rec.owner.fullName,
dog: [rec.dog.name, rec.dog.surname].filter(Boolean).join(' '),
});
continue;
}
matched++;
if (!rec.owner.email && match.owner.email) {
rec.owner.email = match.owner.email;
backfilledEmail++;
}
if (!rec.owner.address && match.owner.address) {
rec.owner.address = match.owner.address;
rec.owner.addressParts = match.owner.addressParts;
backfilledAddress++;
}
if (!rec.owner.phone && match.owner.phone) {
rec.owner.phone = match.owner.phone;
rec.owner.phoneRaw = match.owner.phoneRaw;
backfilledPhone++;
}
rec.legacy.contractMatch = {
entryId: match.legacy.entryId,
matchedBy,
signedOn: match.consent.signedOn,
contractPdfUrl: match.legacy.pdfUrl,
};
}
// -- Write outputs -------------------------------------------------------------
mkdirSync(dirname(CONTRACTS_OUT), { recursive: true });
writeFileSync(
CONTRACTS_OUT,
JSON.stringify({
exportedAt: new Date().toISOString(),
source: { file: 'goodwalk-contract-2026-05-20.csv', rows: data.length, columns: headers.length },
records: contracts,
}, null, 2) + '\n',
'utf8',
);
onboardingPayload.enrichedAt = new Date().toISOString();
onboardingPayload.notes = [
...(onboardingPayload.notes ?? []),
'Enriched from goodwalk-contract-2026-05-20.csv: owner email + postal address backfilled where a contract row matched.',
];
writeFileSync(ONBOARDING_JSON, JSON.stringify(onboardingPayload, null, 2) + '\n', 'utf8');
// -- Build a clients view keyed by email ---------------------------------------
// This is the shape that maps most naturally to a Postgres `clients` table.
const clientsByEmail = new Map();
function upsertClient(email, partial) {
const key = (email ?? '').toLowerCase().trim();
if (!key) return;
const existing = clientsByEmail.get(key) ?? {
email: key,
firstName: null,
lastName: null,
phone: null,
phoneRaw: null,
address: null,
addressParts: null,
dogs: [],
onboardingEntryIds: [],
contractEntryIds: [],
};
for (const [k, v] of Object.entries(partial)) {
if (v == null) continue;
if (k === 'dogs') {
for (const dog of v) {
if (!existing.dogs.find((d) => (d.name ?? '').toLowerCase() === (dog.name ?? '').toLowerCase())) {
existing.dogs.push(dog);
}
}
} else if (k === 'onboardingEntryIds' || k === 'contractEntryIds') {
for (const id of v) if (!existing[k].includes(id)) existing[k].push(id);
} else if (existing[k] == null) {
existing[k] = v;
}
}
clientsByEmail.set(key, existing);
}
for (const c of contracts) {
if (!c.owner.email) continue;
upsertClient(c.owner.email, {
firstName: c.owner.firstName,
lastName: c.owner.lastName,
phone: c.owner.phone,
phoneRaw: c.owner.phoneRaw,
address: c.owner.address,
addressParts: c.owner.addressParts,
dogs: c.dog.fullName ? [{ name: c.dog.fullName, source: 'contract' }] : [],
contractEntryIds: c.legacy.entryId ? [c.legacy.entryId] : [],
});
}
for (const rec of onboardingPayload.records) {
if (!rec.owner.email) continue;
const dogName = [rec.dog.name, rec.dog.surname].filter(Boolean).join(' ');
upsertClient(rec.owner.email, {
firstName: rec.owner.firstName,
lastName: rec.owner.lastName,
phone: rec.owner.phone,
phoneRaw: rec.owner.phoneRaw,
address: rec.owner.address,
addressParts: rec.owner.addressParts,
dogs: dogName ? [{
name: dogName,
dateOfBirth: rec.dog.dateOfBirth,
breed: rec.dog.breed,
source: 'onboarding',
}] : [],
onboardingEntryIds: rec.legacy.entryId ? [rec.legacy.entryId] : [],
});
}
writeFileSync(
CLIENTS_OUT,
JSON.stringify({
exportedAt: new Date().toISOString(),
note: 'Owner-email-keyed merged view. Maps 1:1 to a Postgres `clients` table; the `dogs` array maps to a `dogs` table with a clients_id FK.',
clients: [...clientsByEmail.values()].sort((a, b) => a.email.localeCompare(b.email)),
}, null, 2) + '\n',
'utf8',
);
// -- Summary -------------------------------------------------------------------
console.log(JSON.stringify({
contracts: contracts.length,
contractsWithEmail: contracts.filter((c) => c.owner.email).length,
onboardingRecords: onboardingPayload.records.length,
matched,
unmatched: unmatched.length,
backfilledEmail,
backfilledAddress,
backfilledPhone,
uniqueClientsByEmail: clientsByEmail.size,
unmatchedSample: unmatched.slice(0, 10),
outputs: { CONTRACTS_OUT, ONBOARDING_JSON, CLIENTS_OUT },
}, null, 2));