v4.0.0.2
This commit is contained in:
@@ -0,0 +1,360 @@
|
||||
#!/usr/bin/env node
|
||||
// Cleans the legacy Gravity Forms contract CSV into structured JSON, then
|
||||
// enriches data/legacy-onboarding.json with the owner email + postal address
|
||||
// (and missing phone) wherever an onboarding row matches a contract row.
|
||||
//
|
||||
// Input:
|
||||
// goodwalk-contract-2026-05-20.csv (repo root)
|
||||
// data/legacy-onboarding.json (produced by clean-legacy-onboarding.mjs)
|
||||
//
|
||||
// Output:
|
||||
// data/legacy-contracts.json (cleaned contracts)
|
||||
// data/legacy-onboarding.json (enriched in place)
|
||||
// data/legacy-clients.json (owner-email-keyed merged view)
|
||||
//
|
||||
// Run: node scripts/clean-legacy-contracts.mjs
|
||||
|
||||
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
|
||||
import { dirname, resolve } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = resolve(__dirname, '..');
|
||||
const CONTRACT_CSV = resolve(ROOT, 'goodwalk-contract-2026-05-20.csv');
|
||||
const ONBOARDING_JSON = resolve(ROOT, 'data/legacy-onboarding.json');
|
||||
const CONTRACTS_OUT = resolve(ROOT, 'data/legacy-contracts.json');
|
||||
const CLIENTS_OUT = resolve(ROOT, 'data/legacy-clients.json');
|
||||
|
||||
function parseCsv(text) {
|
||||
const rows = [];
|
||||
let cur = [];
|
||||
let val = '';
|
||||
let inQ = false;
|
||||
for (let i = 0; i < text.length; i++) {
|
||||
const c = text[i];
|
||||
if (inQ) {
|
||||
if (c === '"') {
|
||||
if (text[i + 1] === '"') { val += '"'; i++; }
|
||||
else inQ = false;
|
||||
} else val += c;
|
||||
} else {
|
||||
if (c === '"') inQ = true;
|
||||
else if (c === ',') { cur.push(val); val = ''; }
|
||||
else if (c === '\n') { cur.push(val); rows.push(cur); cur = []; val = ''; }
|
||||
else if (c === '\r') { /* skip */ }
|
||||
else val += c;
|
||||
}
|
||||
}
|
||||
if (val.length || cur.length) { cur.push(val); rows.push(cur); }
|
||||
return rows;
|
||||
}
|
||||
|
||||
const trimOrNull = (v) => {
|
||||
const s = (v ?? '').trim();
|
||||
return s ? s : null;
|
||||
};
|
||||
|
||||
const lowerKey = (v) => (v ?? '').trim().toLowerCase().replace(/\s+/g, ' ');
|
||||
|
||||
function normalizePhone(raw) {
|
||||
const original = (raw ?? '').trim();
|
||||
if (!original) return { raw: null, e164: null };
|
||||
let digits = original.replace(/[^\d+]/g, '');
|
||||
if (digits.startsWith('+')) return { raw: original, e164: '+' + digits.slice(1).replace(/\D/g, '') };
|
||||
digits = digits.replace(/\D/g, '');
|
||||
if (!digits) return { raw: original, e164: null };
|
||||
if (digits.startsWith('64')) return { raw: original, e164: '+' + digits };
|
||||
if (digits.startsWith('0')) return { raw: original, e164: '+64' + digits.slice(1) };
|
||||
return { raw: original, e164: null };
|
||||
}
|
||||
|
||||
function composeAddress({ street, line2, city, suburb, postal, country }) {
|
||||
const parts = [street, line2, suburb, city, postal, country]
|
||||
.map((p) => (p ?? '').trim())
|
||||
.filter(Boolean);
|
||||
// De-dupe consecutive identical fragments (suburb sometimes duplicates city).
|
||||
const deduped = [];
|
||||
for (const p of parts) {
|
||||
if (!deduped.length || deduped[deduped.length - 1].toLowerCase() !== p.toLowerCase()) {
|
||||
deduped.push(p);
|
||||
}
|
||||
}
|
||||
return deduped.length ? deduped.join(', ') : null;
|
||||
}
|
||||
|
||||
// -- Parse contract CSV --------------------------------------------------------
|
||||
const raw = readFileSync(CONTRACT_CSV, 'utf8').replace(/^/, '');
|
||||
const rows = parseCsv(raw);
|
||||
const headers = rows[0].map((h) => h.trim());
|
||||
const data = rows.slice(1).filter((r) => r.some((c) => (c ?? '').trim() !== ''));
|
||||
const idx = Object.fromEntries(headers.map((h, i) => [h, i]));
|
||||
const col = (row, name) => row[idx[name]] ?? '';
|
||||
|
||||
const contracts = data.map((row) => {
|
||||
const first = trimOrNull(col(row, 'Owners Name (First Name)'));
|
||||
const middle = trimOrNull(col(row, 'Owners Name (Middle)'));
|
||||
const last = trimOrNull(col(row, 'Owners Name (Last Name/Surname)'));
|
||||
const fullName = [first, middle, last].filter(Boolean).join(' ') || null;
|
||||
const phone = normalizePhone(col(row, 'Phone'));
|
||||
|
||||
return {
|
||||
legacy: {
|
||||
entryId: trimOrNull(col(row, 'Entry Id')),
|
||||
entryDate: trimOrNull(col(row, 'Entry Date')),
|
||||
dateUpdated: trimOrNull(col(row, 'Date Updated')),
|
||||
createdByUserId: trimOrNull(col(row, 'Created By (User Id)')),
|
||||
sourceUrl: trimOrNull(col(row, 'Source Url')),
|
||||
userAgent: trimOrNull(col(row, 'User Agent')),
|
||||
userIp: trimOrNull(col(row, 'User IP')),
|
||||
pdfUrl: trimOrNull(col(row, 'PDF: PDF Label')),
|
||||
signatureUrl: trimOrNull(col(row, 'Owner Signature')),
|
||||
},
|
||||
owner: {
|
||||
firstName: first,
|
||||
middleName: middle,
|
||||
lastName: last,
|
||||
fullName,
|
||||
email: trimOrNull(col(row, "Owner's email (Enter Email)")),
|
||||
phone: phone.e164,
|
||||
phoneRaw: phone.raw,
|
||||
address: composeAddress({
|
||||
street: col(row, 'Residential Address (Street Address)'),
|
||||
line2: col(row, 'Residential Address (Address Line 2)'),
|
||||
city: col(row, 'Residential Address (City)'),
|
||||
suburb: col(row, 'Residential Address (Suburb)'),
|
||||
postal: col(row, 'Residential Address (ZIP / Postal Code)'),
|
||||
country: col(row, 'Residential Address (Country)'),
|
||||
}),
|
||||
addressParts: {
|
||||
street: trimOrNull(col(row, 'Residential Address (Street Address)')),
|
||||
line2: trimOrNull(col(row, 'Residential Address (Address Line 2)')),
|
||||
suburb: trimOrNull(col(row, 'Residential Address (Suburb)')),
|
||||
city: trimOrNull(col(row, 'Residential Address (City)')),
|
||||
postalCode: trimOrNull(col(row, 'Residential Address (ZIP / Postal Code)')),
|
||||
country: trimOrNull(col(row, 'Residential Address (Country)')),
|
||||
},
|
||||
},
|
||||
dog: {
|
||||
fullName: trimOrNull(col(row, "Dog's name (include surname)")),
|
||||
},
|
||||
consent: {
|
||||
checked: (col(row, 'Consent (Consent)') ?? '').trim().toLowerCase() === 'checked',
|
||||
text: trimOrNull(col(row, 'Consent (Text)')),
|
||||
signedOn: trimOrNull(col(row, 'Date contract signed')),
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
// -- Build contract lookup -----------------------------------------------------
|
||||
// Some owners appear twice (re-signing) — keep the highest entryId per key.
|
||||
function keepNewer(map, key, contract) {
|
||||
const existing = map.get(key);
|
||||
if (!existing) { map.set(key, contract); return; }
|
||||
const a = Number(contract.legacy.entryId) || 0;
|
||||
const b = Number(existing.legacy.entryId) || 0;
|
||||
if (a > b) map.set(key, contract);
|
||||
}
|
||||
|
||||
const byNameKey = new Map(); // "last|first"
|
||||
const byLastKey = new Map(); // "last"
|
||||
const byDogKey = new Map(); // dog full name lowercased
|
||||
const byDogFirstWord = new Map(); // first token of dog name (handles surname mismatches)
|
||||
|
||||
for (const c of contracts) {
|
||||
const last = lowerKey(c.owner.lastName);
|
||||
const first = lowerKey(c.owner.firstName);
|
||||
if (last && first) keepNewer(byNameKey, `${last}|${first}`, c);
|
||||
if (last) keepNewer(byLastKey, last, c);
|
||||
if (c.dog.fullName) {
|
||||
keepNewer(byDogKey, lowerKey(c.dog.fullName), c);
|
||||
const firstToken = lowerKey(c.dog.fullName).split(/\s+/)[0];
|
||||
if (firstToken) keepNewer(byDogFirstWord, `${firstToken}|${last}`, c);
|
||||
}
|
||||
}
|
||||
|
||||
// -- Enrich onboarding ---------------------------------------------------------
|
||||
if (!existsSync(ONBOARDING_JSON)) {
|
||||
console.error(`Onboarding JSON not found at ${ONBOARDING_JSON}. Run clean-legacy-onboarding.mjs first.`);
|
||||
process.exit(1);
|
||||
}
|
||||
const onboardingPayload = JSON.parse(readFileSync(ONBOARDING_JSON, 'utf8'));
|
||||
|
||||
let matched = 0;
|
||||
let backfilledEmail = 0;
|
||||
let backfilledAddress = 0;
|
||||
let backfilledPhone = 0;
|
||||
const unmatched = [];
|
||||
|
||||
for (const rec of onboardingPayload.records) {
|
||||
const last = lowerKey(rec.owner.lastName);
|
||||
const first = lowerKey(rec.owner.firstName);
|
||||
const dogFull = lowerKey([rec.dog.name, rec.dog.surname].filter(Boolean).join(' '));
|
||||
const dogFirst = lowerKey(rec.dog.name);
|
||||
|
||||
let match = null;
|
||||
let matchedBy = null;
|
||||
if (last && first && byNameKey.has(`${last}|${first}`)) {
|
||||
match = byNameKey.get(`${last}|${first}`);
|
||||
matchedBy = 'owner_name';
|
||||
} else if (dogFull && byDogKey.has(dogFull)) {
|
||||
match = byDogKey.get(dogFull);
|
||||
matchedBy = 'dog_full_name';
|
||||
} else if (dogFirst && last && byDogFirstWord.has(`${dogFirst}|${last}`)) {
|
||||
match = byDogFirstWord.get(`${dogFirst}|${last}`);
|
||||
matchedBy = 'dog_first_owner_last';
|
||||
} else if (last && byLastKey.has(last)) {
|
||||
// Last-resort: lone surname match. Only accept if surname is unique enough
|
||||
// (i.e. only one contract has it).
|
||||
const candidates = contracts.filter((c) => lowerKey(c.owner.lastName) === last);
|
||||
if (candidates.length === 1) {
|
||||
match = candidates[0];
|
||||
matchedBy = 'owner_last_only';
|
||||
}
|
||||
}
|
||||
|
||||
if (!match) {
|
||||
unmatched.push({
|
||||
onboardingEntryId: rec.legacy.entryId,
|
||||
owner: rec.owner.fullName,
|
||||
dog: [rec.dog.name, rec.dog.surname].filter(Boolean).join(' '),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
matched++;
|
||||
if (!rec.owner.email && match.owner.email) {
|
||||
rec.owner.email = match.owner.email;
|
||||
backfilledEmail++;
|
||||
}
|
||||
if (!rec.owner.address && match.owner.address) {
|
||||
rec.owner.address = match.owner.address;
|
||||
rec.owner.addressParts = match.owner.addressParts;
|
||||
backfilledAddress++;
|
||||
}
|
||||
if (!rec.owner.phone && match.owner.phone) {
|
||||
rec.owner.phone = match.owner.phone;
|
||||
rec.owner.phoneRaw = match.owner.phoneRaw;
|
||||
backfilledPhone++;
|
||||
}
|
||||
rec.legacy.contractMatch = {
|
||||
entryId: match.legacy.entryId,
|
||||
matchedBy,
|
||||
signedOn: match.consent.signedOn,
|
||||
contractPdfUrl: match.legacy.pdfUrl,
|
||||
};
|
||||
}
|
||||
|
||||
// -- Write outputs -------------------------------------------------------------
|
||||
mkdirSync(dirname(CONTRACTS_OUT), { recursive: true });
|
||||
|
||||
writeFileSync(
|
||||
CONTRACTS_OUT,
|
||||
JSON.stringify({
|
||||
exportedAt: new Date().toISOString(),
|
||||
source: { file: 'goodwalk-contract-2026-05-20.csv', rows: data.length, columns: headers.length },
|
||||
records: contracts,
|
||||
}, null, 2) + '\n',
|
||||
'utf8',
|
||||
);
|
||||
|
||||
onboardingPayload.enrichedAt = new Date().toISOString();
|
||||
onboardingPayload.notes = [
|
||||
...(onboardingPayload.notes ?? []),
|
||||
'Enriched from goodwalk-contract-2026-05-20.csv: owner email + postal address backfilled where a contract row matched.',
|
||||
];
|
||||
writeFileSync(ONBOARDING_JSON, JSON.stringify(onboardingPayload, null, 2) + '\n', 'utf8');
|
||||
|
||||
// -- Build a clients view keyed by email ---------------------------------------
|
||||
// This is the shape that maps most naturally to a Postgres `clients` table.
|
||||
const clientsByEmail = new Map();
|
||||
|
||||
function upsertClient(email, partial) {
|
||||
const key = (email ?? '').toLowerCase().trim();
|
||||
if (!key) return;
|
||||
const existing = clientsByEmail.get(key) ?? {
|
||||
email: key,
|
||||
firstName: null,
|
||||
lastName: null,
|
||||
phone: null,
|
||||
phoneRaw: null,
|
||||
address: null,
|
||||
addressParts: null,
|
||||
dogs: [],
|
||||
onboardingEntryIds: [],
|
||||
contractEntryIds: [],
|
||||
};
|
||||
for (const [k, v] of Object.entries(partial)) {
|
||||
if (v == null) continue;
|
||||
if (k === 'dogs') {
|
||||
for (const dog of v) {
|
||||
if (!existing.dogs.find((d) => (d.name ?? '').toLowerCase() === (dog.name ?? '').toLowerCase())) {
|
||||
existing.dogs.push(dog);
|
||||
}
|
||||
}
|
||||
} else if (k === 'onboardingEntryIds' || k === 'contractEntryIds') {
|
||||
for (const id of v) if (!existing[k].includes(id)) existing[k].push(id);
|
||||
} else if (existing[k] == null) {
|
||||
existing[k] = v;
|
||||
}
|
||||
}
|
||||
clientsByEmail.set(key, existing);
|
||||
}
|
||||
|
||||
for (const c of contracts) {
|
||||
if (!c.owner.email) continue;
|
||||
upsertClient(c.owner.email, {
|
||||
firstName: c.owner.firstName,
|
||||
lastName: c.owner.lastName,
|
||||
phone: c.owner.phone,
|
||||
phoneRaw: c.owner.phoneRaw,
|
||||
address: c.owner.address,
|
||||
addressParts: c.owner.addressParts,
|
||||
dogs: c.dog.fullName ? [{ name: c.dog.fullName, source: 'contract' }] : [],
|
||||
contractEntryIds: c.legacy.entryId ? [c.legacy.entryId] : [],
|
||||
});
|
||||
}
|
||||
|
||||
for (const rec of onboardingPayload.records) {
|
||||
if (!rec.owner.email) continue;
|
||||
const dogName = [rec.dog.name, rec.dog.surname].filter(Boolean).join(' ');
|
||||
upsertClient(rec.owner.email, {
|
||||
firstName: rec.owner.firstName,
|
||||
lastName: rec.owner.lastName,
|
||||
phone: rec.owner.phone,
|
||||
phoneRaw: rec.owner.phoneRaw,
|
||||
address: rec.owner.address,
|
||||
addressParts: rec.owner.addressParts,
|
||||
dogs: dogName ? [{
|
||||
name: dogName,
|
||||
dateOfBirth: rec.dog.dateOfBirth,
|
||||
breed: rec.dog.breed,
|
||||
source: 'onboarding',
|
||||
}] : [],
|
||||
onboardingEntryIds: rec.legacy.entryId ? [rec.legacy.entryId] : [],
|
||||
});
|
||||
}
|
||||
|
||||
writeFileSync(
|
||||
CLIENTS_OUT,
|
||||
JSON.stringify({
|
||||
exportedAt: new Date().toISOString(),
|
||||
note: 'Owner-email-keyed merged view. Maps 1:1 to a Postgres `clients` table; the `dogs` array maps to a `dogs` table with a clients_id FK.',
|
||||
clients: [...clientsByEmail.values()].sort((a, b) => a.email.localeCompare(b.email)),
|
||||
}, null, 2) + '\n',
|
||||
'utf8',
|
||||
);
|
||||
|
||||
// -- Summary -------------------------------------------------------------------
|
||||
console.log(JSON.stringify({
|
||||
contracts: contracts.length,
|
||||
contractsWithEmail: contracts.filter((c) => c.owner.email).length,
|
||||
onboardingRecords: onboardingPayload.records.length,
|
||||
matched,
|
||||
unmatched: unmatched.length,
|
||||
backfilledEmail,
|
||||
backfilledAddress,
|
||||
backfilledPhone,
|
||||
uniqueClientsByEmail: clientsByEmail.size,
|
||||
unmatchedSample: unmatched.slice(0, 10),
|
||||
outputs: { CONTRACTS_OUT, ONBOARDING_JSON, CLIENTS_OUT },
|
||||
}, null, 2));
|
||||
Reference in New Issue
Block a user