361 lines
13 KiB
JavaScript
361 lines
13 KiB
JavaScript
#!/usr/bin/env node
|
||
// Cleans the legacy Gravity Forms contract CSV into structured JSON, then
|
||
// enriches data/legacy-onboarding.json with the owner email + postal address
|
||
// (and missing phone) wherever an onboarding row matches a contract row.
|
||
//
|
||
// Input:
|
||
// goodwalk-contract-2026-05-20.csv (repo root)
|
||
// data/legacy-onboarding.json (produced by clean-legacy-onboarding.mjs)
|
||
//
|
||
// Output:
|
||
// data/legacy-contracts.json (cleaned contracts)
|
||
// data/legacy-onboarding.json (enriched in place)
|
||
// data/legacy-clients.json (owner-email-keyed merged view)
|
||
//
|
||
// Run: node scripts/clean-legacy-contracts.mjs
|
||
|
||
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
|
||
import { dirname, resolve } from 'node:path';
|
||
import { fileURLToPath } from 'node:url';
|
||
|
||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||
const ROOT = resolve(__dirname, '..');
|
||
const CONTRACT_CSV = resolve(ROOT, 'goodwalk-contract-2026-05-20.csv');
|
||
const ONBOARDING_JSON = resolve(ROOT, 'data/legacy-onboarding.json');
|
||
const CONTRACTS_OUT = resolve(ROOT, 'data/legacy-contracts.json');
|
||
const CLIENTS_OUT = resolve(ROOT, 'data/legacy-clients.json');
|
||
|
||
function parseCsv(text) {
|
||
const rows = [];
|
||
let cur = [];
|
||
let val = '';
|
||
let inQ = false;
|
||
for (let i = 0; i < text.length; i++) {
|
||
const c = text[i];
|
||
if (inQ) {
|
||
if (c === '"') {
|
||
if (text[i + 1] === '"') { val += '"'; i++; }
|
||
else inQ = false;
|
||
} else val += c;
|
||
} else {
|
||
if (c === '"') inQ = true;
|
||
else if (c === ',') { cur.push(val); val = ''; }
|
||
else if (c === '\n') { cur.push(val); rows.push(cur); cur = []; val = ''; }
|
||
else if (c === '\r') { /* skip */ }
|
||
else val += c;
|
||
}
|
||
}
|
||
if (val.length || cur.length) { cur.push(val); rows.push(cur); }
|
||
return rows;
|
||
}
|
||
|
||
const trimOrNull = (v) => {
|
||
const s = (v ?? '').trim();
|
||
return s ? s : null;
|
||
};
|
||
|
||
const lowerKey = (v) => (v ?? '').trim().toLowerCase().replace(/\s+/g, ' ');
|
||
|
||
function normalizePhone(raw) {
|
||
const original = (raw ?? '').trim();
|
||
if (!original) return { raw: null, e164: null };
|
||
let digits = original.replace(/[^\d+]/g, '');
|
||
if (digits.startsWith('+')) return { raw: original, e164: '+' + digits.slice(1).replace(/\D/g, '') };
|
||
digits = digits.replace(/\D/g, '');
|
||
if (!digits) return { raw: original, e164: null };
|
||
if (digits.startsWith('64')) return { raw: original, e164: '+' + digits };
|
||
if (digits.startsWith('0')) return { raw: original, e164: '+64' + digits.slice(1) };
|
||
return { raw: original, e164: null };
|
||
}
|
||
|
||
function composeAddress({ street, line2, city, suburb, postal, country }) {
|
||
const parts = [street, line2, suburb, city, postal, country]
|
||
.map((p) => (p ?? '').trim())
|
||
.filter(Boolean);
|
||
// De-dupe consecutive identical fragments (suburb sometimes duplicates city).
|
||
const deduped = [];
|
||
for (const p of parts) {
|
||
if (!deduped.length || deduped[deduped.length - 1].toLowerCase() !== p.toLowerCase()) {
|
||
deduped.push(p);
|
||
}
|
||
}
|
||
return deduped.length ? deduped.join(', ') : null;
|
||
}
|
||
|
||
// -- Parse contract CSV --------------------------------------------------------
|
||
const raw = readFileSync(CONTRACT_CSV, 'utf8').replace(/^/, '');
|
||
const rows = parseCsv(raw);
|
||
const headers = rows[0].map((h) => h.trim());
|
||
const data = rows.slice(1).filter((r) => r.some((c) => (c ?? '').trim() !== ''));
|
||
const idx = Object.fromEntries(headers.map((h, i) => [h, i]));
|
||
const col = (row, name) => row[idx[name]] ?? '';
|
||
|
||
const contracts = data.map((row) => {
|
||
const first = trimOrNull(col(row, 'Owners Name (First Name)'));
|
||
const middle = trimOrNull(col(row, 'Owners Name (Middle)'));
|
||
const last = trimOrNull(col(row, 'Owners Name (Last Name/Surname)'));
|
||
const fullName = [first, middle, last].filter(Boolean).join(' ') || null;
|
||
const phone = normalizePhone(col(row, 'Phone'));
|
||
|
||
return {
|
||
legacy: {
|
||
entryId: trimOrNull(col(row, 'Entry Id')),
|
||
entryDate: trimOrNull(col(row, 'Entry Date')),
|
||
dateUpdated: trimOrNull(col(row, 'Date Updated')),
|
||
createdByUserId: trimOrNull(col(row, 'Created By (User Id)')),
|
||
sourceUrl: trimOrNull(col(row, 'Source Url')),
|
||
userAgent: trimOrNull(col(row, 'User Agent')),
|
||
userIp: trimOrNull(col(row, 'User IP')),
|
||
pdfUrl: trimOrNull(col(row, 'PDF: PDF Label')),
|
||
signatureUrl: trimOrNull(col(row, 'Owner Signature')),
|
||
},
|
||
owner: {
|
||
firstName: first,
|
||
middleName: middle,
|
||
lastName: last,
|
||
fullName,
|
||
email: trimOrNull(col(row, "Owner's email (Enter Email)")),
|
||
phone: phone.e164,
|
||
phoneRaw: phone.raw,
|
||
address: composeAddress({
|
||
street: col(row, 'Residential Address (Street Address)'),
|
||
line2: col(row, 'Residential Address (Address Line 2)'),
|
||
city: col(row, 'Residential Address (City)'),
|
||
suburb: col(row, 'Residential Address (Suburb)'),
|
||
postal: col(row, 'Residential Address (ZIP / Postal Code)'),
|
||
country: col(row, 'Residential Address (Country)'),
|
||
}),
|
||
addressParts: {
|
||
street: trimOrNull(col(row, 'Residential Address (Street Address)')),
|
||
line2: trimOrNull(col(row, 'Residential Address (Address Line 2)')),
|
||
suburb: trimOrNull(col(row, 'Residential Address (Suburb)')),
|
||
city: trimOrNull(col(row, 'Residential Address (City)')),
|
||
postalCode: trimOrNull(col(row, 'Residential Address (ZIP / Postal Code)')),
|
||
country: trimOrNull(col(row, 'Residential Address (Country)')),
|
||
},
|
||
},
|
||
dog: {
|
||
fullName: trimOrNull(col(row, "Dog's name (include surname)")),
|
||
},
|
||
consent: {
|
||
checked: (col(row, 'Consent (Consent)') ?? '').trim().toLowerCase() === 'checked',
|
||
text: trimOrNull(col(row, 'Consent (Text)')),
|
||
signedOn: trimOrNull(col(row, 'Date contract signed')),
|
||
},
|
||
};
|
||
});
|
||
|
||
// -- Build contract lookup -----------------------------------------------------
|
||
// Some owners appear twice (re-signing) — keep the highest entryId per key.
|
||
function keepNewer(map, key, contract) {
|
||
const existing = map.get(key);
|
||
if (!existing) { map.set(key, contract); return; }
|
||
const a = Number(contract.legacy.entryId) || 0;
|
||
const b = Number(existing.legacy.entryId) || 0;
|
||
if (a > b) map.set(key, contract);
|
||
}
|
||
|
||
const byNameKey = new Map(); // "last|first"
|
||
const byLastKey = new Map(); // "last"
|
||
const byDogKey = new Map(); // dog full name lowercased
|
||
const byDogFirstWord = new Map(); // first token of dog name (handles surname mismatches)
|
||
|
||
for (const c of contracts) {
|
||
const last = lowerKey(c.owner.lastName);
|
||
const first = lowerKey(c.owner.firstName);
|
||
if (last && first) keepNewer(byNameKey, `${last}|${first}`, c);
|
||
if (last) keepNewer(byLastKey, last, c);
|
||
if (c.dog.fullName) {
|
||
keepNewer(byDogKey, lowerKey(c.dog.fullName), c);
|
||
const firstToken = lowerKey(c.dog.fullName).split(/\s+/)[0];
|
||
if (firstToken) keepNewer(byDogFirstWord, `${firstToken}|${last}`, c);
|
||
}
|
||
}
|
||
|
||
// -- Enrich onboarding ---------------------------------------------------------
|
||
if (!existsSync(ONBOARDING_JSON)) {
|
||
console.error(`Onboarding JSON not found at ${ONBOARDING_JSON}. Run clean-legacy-onboarding.mjs first.`);
|
||
process.exit(1);
|
||
}
|
||
const onboardingPayload = JSON.parse(readFileSync(ONBOARDING_JSON, 'utf8'));
|
||
|
||
let matched = 0;
|
||
let backfilledEmail = 0;
|
||
let backfilledAddress = 0;
|
||
let backfilledPhone = 0;
|
||
const unmatched = [];
|
||
|
||
for (const rec of onboardingPayload.records) {
|
||
const last = lowerKey(rec.owner.lastName);
|
||
const first = lowerKey(rec.owner.firstName);
|
||
const dogFull = lowerKey([rec.dog.name, rec.dog.surname].filter(Boolean).join(' '));
|
||
const dogFirst = lowerKey(rec.dog.name);
|
||
|
||
let match = null;
|
||
let matchedBy = null;
|
||
if (last && first && byNameKey.has(`${last}|${first}`)) {
|
||
match = byNameKey.get(`${last}|${first}`);
|
||
matchedBy = 'owner_name';
|
||
} else if (dogFull && byDogKey.has(dogFull)) {
|
||
match = byDogKey.get(dogFull);
|
||
matchedBy = 'dog_full_name';
|
||
} else if (dogFirst && last && byDogFirstWord.has(`${dogFirst}|${last}`)) {
|
||
match = byDogFirstWord.get(`${dogFirst}|${last}`);
|
||
matchedBy = 'dog_first_owner_last';
|
||
} else if (last && byLastKey.has(last)) {
|
||
// Last-resort: lone surname match. Only accept if surname is unique enough
|
||
// (i.e. only one contract has it).
|
||
const candidates = contracts.filter((c) => lowerKey(c.owner.lastName) === last);
|
||
if (candidates.length === 1) {
|
||
match = candidates[0];
|
||
matchedBy = 'owner_last_only';
|
||
}
|
||
}
|
||
|
||
if (!match) {
|
||
unmatched.push({
|
||
onboardingEntryId: rec.legacy.entryId,
|
||
owner: rec.owner.fullName,
|
||
dog: [rec.dog.name, rec.dog.surname].filter(Boolean).join(' '),
|
||
});
|
||
continue;
|
||
}
|
||
|
||
matched++;
|
||
if (!rec.owner.email && match.owner.email) {
|
||
rec.owner.email = match.owner.email;
|
||
backfilledEmail++;
|
||
}
|
||
if (!rec.owner.address && match.owner.address) {
|
||
rec.owner.address = match.owner.address;
|
||
rec.owner.addressParts = match.owner.addressParts;
|
||
backfilledAddress++;
|
||
}
|
||
if (!rec.owner.phone && match.owner.phone) {
|
||
rec.owner.phone = match.owner.phone;
|
||
rec.owner.phoneRaw = match.owner.phoneRaw;
|
||
backfilledPhone++;
|
||
}
|
||
rec.legacy.contractMatch = {
|
||
entryId: match.legacy.entryId,
|
||
matchedBy,
|
||
signedOn: match.consent.signedOn,
|
||
contractPdfUrl: match.legacy.pdfUrl,
|
||
};
|
||
}
|
||
|
||
// -- Write outputs -------------------------------------------------------------
|
||
mkdirSync(dirname(CONTRACTS_OUT), { recursive: true });
|
||
|
||
writeFileSync(
|
||
CONTRACTS_OUT,
|
||
JSON.stringify({
|
||
exportedAt: new Date().toISOString(),
|
||
source: { file: 'goodwalk-contract-2026-05-20.csv', rows: data.length, columns: headers.length },
|
||
records: contracts,
|
||
}, null, 2) + '\n',
|
||
'utf8',
|
||
);
|
||
|
||
onboardingPayload.enrichedAt = new Date().toISOString();
|
||
onboardingPayload.notes = [
|
||
...(onboardingPayload.notes ?? []),
|
||
'Enriched from goodwalk-contract-2026-05-20.csv: owner email + postal address backfilled where a contract row matched.',
|
||
];
|
||
writeFileSync(ONBOARDING_JSON, JSON.stringify(onboardingPayload, null, 2) + '\n', 'utf8');
|
||
|
||
// -- Build a clients view keyed by email ---------------------------------------
|
||
// This is the shape that maps most naturally to a Postgres `clients` table.
|
||
const clientsByEmail = new Map();
|
||
|
||
function upsertClient(email, partial) {
|
||
const key = (email ?? '').toLowerCase().trim();
|
||
if (!key) return;
|
||
const existing = clientsByEmail.get(key) ?? {
|
||
email: key,
|
||
firstName: null,
|
||
lastName: null,
|
||
phone: null,
|
||
phoneRaw: null,
|
||
address: null,
|
||
addressParts: null,
|
||
dogs: [],
|
||
onboardingEntryIds: [],
|
||
contractEntryIds: [],
|
||
};
|
||
for (const [k, v] of Object.entries(partial)) {
|
||
if (v == null) continue;
|
||
if (k === 'dogs') {
|
||
for (const dog of v) {
|
||
if (!existing.dogs.find((d) => (d.name ?? '').toLowerCase() === (dog.name ?? '').toLowerCase())) {
|
||
existing.dogs.push(dog);
|
||
}
|
||
}
|
||
} else if (k === 'onboardingEntryIds' || k === 'contractEntryIds') {
|
||
for (const id of v) if (!existing[k].includes(id)) existing[k].push(id);
|
||
} else if (existing[k] == null) {
|
||
existing[k] = v;
|
||
}
|
||
}
|
||
clientsByEmail.set(key, existing);
|
||
}
|
||
|
||
for (const c of contracts) {
|
||
if (!c.owner.email) continue;
|
||
upsertClient(c.owner.email, {
|
||
firstName: c.owner.firstName,
|
||
lastName: c.owner.lastName,
|
||
phone: c.owner.phone,
|
||
phoneRaw: c.owner.phoneRaw,
|
||
address: c.owner.address,
|
||
addressParts: c.owner.addressParts,
|
||
dogs: c.dog.fullName ? [{ name: c.dog.fullName, source: 'contract' }] : [],
|
||
contractEntryIds: c.legacy.entryId ? [c.legacy.entryId] : [],
|
||
});
|
||
}
|
||
|
||
for (const rec of onboardingPayload.records) {
|
||
if (!rec.owner.email) continue;
|
||
const dogName = [rec.dog.name, rec.dog.surname].filter(Boolean).join(' ');
|
||
upsertClient(rec.owner.email, {
|
||
firstName: rec.owner.firstName,
|
||
lastName: rec.owner.lastName,
|
||
phone: rec.owner.phone,
|
||
phoneRaw: rec.owner.phoneRaw,
|
||
address: rec.owner.address,
|
||
addressParts: rec.owner.addressParts,
|
||
dogs: dogName ? [{
|
||
name: dogName,
|
||
dateOfBirth: rec.dog.dateOfBirth,
|
||
breed: rec.dog.breed,
|
||
source: 'onboarding',
|
||
}] : [],
|
||
onboardingEntryIds: rec.legacy.entryId ? [rec.legacy.entryId] : [],
|
||
});
|
||
}
|
||
|
||
writeFileSync(
|
||
CLIENTS_OUT,
|
||
JSON.stringify({
|
||
exportedAt: new Date().toISOString(),
|
||
note: 'Owner-email-keyed merged view. Maps 1:1 to a Postgres `clients` table; the `dogs` array maps to a `dogs` table with a clients_id FK.',
|
||
clients: [...clientsByEmail.values()].sort((a, b) => a.email.localeCompare(b.email)),
|
||
}, null, 2) + '\n',
|
||
'utf8',
|
||
);
|
||
|
||
// -- Summary -------------------------------------------------------------------
|
||
console.log(JSON.stringify({
|
||
contracts: contracts.length,
|
||
contractsWithEmail: contracts.filter((c) => c.owner.email).length,
|
||
onboardingRecords: onboardingPayload.records.length,
|
||
matched,
|
||
unmatched: unmatched.length,
|
||
backfilledEmail,
|
||
backfilledAddress,
|
||
backfilledPhone,
|
||
uniqueClientsByEmail: clientsByEmail.size,
|
||
unmatchedSample: unmatched.slice(0, 10),
|
||
outputs: { CONTRACTS_OUT, ONBOARDING_JSON, CLIENTS_OUT },
|
||
}, null, 2));
|