1 · Audit Counters
Seven required structural metrics from the audit view.
Original listings
—
Cleaned listings
—
Excluded · old
—
Excluded · mileage
—
Excluded · salvage
—
Excluded · price
—
Excluded · missing
—
2 · OLS Regression — SQL vs Python Reference
Pure-SQL Cramer's Rule implementation compared against numpy closed-form OLS on the same seeded data.
Clean cohort (accident_flag = false)
| Metric | SQL (Cramer) | Python (numpy) | |Δ| |
|---|---|---|---|
| intercept (β₀) | — | -2,947,669.6854 | — |
| year coefficient (β₁) | — | 1,470.62949313 | — |
| mileage coefficient (β₂) | — | -0.0621497135 | — |
| r_squared | — | 0.9889860361 | — |
| sample_count | — | 18 | — |
Accident cohort (accident_flag = true)
| Metric | SQL (Cramer) | Python (numpy) | |Δ| |
|---|---|---|---|
| intercept (β₀) | — | -3,000,175.0619 | — |
| year coefficient (β₁) | — | 1,494.88407012 | — |
| mileage coefficient (β₂) | — | -0.0499646763 | — |
| r_squared | — | 0.9730032371 | — |
| sample_count | — | 12 | — |
3 · Diminished-Value RPC Calculator
Live execution of get_vehicle_diminished_value() with the full 10%–25% validation/remediation loop.
Run the RPC to see all 13 return fields.
4 · Raw Listings vs Excluded Rows
Each row labeled with the precise reason for exclusion from listing_audit.
Included (0)
| VIN | Year | Price | Mileage | Acc |
|---|
Excluded (0)
| VIN | Year | Price | Mileage | Acc | Reason |
|---|
5 · SQL Migration Script
The exact migration applied to this Lovable Cloud backend. Copy or download to re-create the schema anywhere.
-- =====================================================================
-- National Accident Stigma Database (NASD) — POC backend
-- Tables, views, indexes, OLS regression (Cramer's Rule), and RPC.
-- =====================================================================
-- 1. BASE TABLE -------------------------------------------------------
CREATE TABLE IF NOT EXISTS public.listings (
id BIGSERIAL PRIMARY KEY,
vin TEXT,
year INT,
make TEXT,
model TEXT,
trim TEXT,
price NUMERIC,
mileage NUMERIC,
zipcode TEXT,
state TEXT,
source TEXT,
accident_flag BOOLEAN NOT NULL DEFAULT false,
listing_date DATE,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
GRANT SELECT, INSERT, UPDATE, DELETE ON public.listings TO authenticated;
GRANT SELECT ON public.listings TO anon;
GRANT ALL ON public.listings TO service_role;
GRANT USAGE, SELECT ON SEQUENCE public.listings_id_seq TO anon, authenticated, service_role;
ALTER TABLE public.listings ENABLE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS "Public read listings (POC)" ON public.listings;
CREATE POLICY "Public read listings (POC)" ON public.listings FOR SELECT USING (true);
CREATE INDEX IF NOT EXISTS idx_listings_make_model ON public.listings (lower(trim(make)), lower(trim(model)));
CREATE INDEX IF NOT EXISTS idx_listings_listing_date ON public.listings (listing_date);
CREATE INDEX IF NOT EXISTS idx_listings_accident_flag ON public.listings (accident_flag);
-- 2. AUDIT VIEW (each row labeled Included / Excluded + reason) -------
CREATE OR REPLACE VIEW public.listing_audit
WITH (security_invoker = true) AS
WITH base AS (
SELECT l.*,
CASE WHEN l.vin IS NULL OR l.year IS NULL OR l.make IS NULL
OR l.model IS NULL OR l.price IS NULL
OR l.mileage IS NULL OR l.listing_date IS NULL
THEN 'missing_required_fields' END AS r_missing,
CASE WHEN l.listing_date IS NOT NULL
AND l.listing_date < CURRENT_DATE - INTERVAL '365 days'
THEN 'old_listing' END AS r_old,
CASE WHEN l.mileage IS NOT NULL AND l.mileage > 250000
THEN 'mileage_outlier' END AS r_mileage,
CASE WHEN COALESCE(l.source,'') ~* '(salvage|rebuilt|flood|junk|lemon)'
OR COALESCE(l.trim,'') ~* '(salvage|rebuilt|flood|junk|lemon)'
THEN 'salvage_title' END AS r_salvage
FROM public.listings l
),
with_median AS (
SELECT lower(trim(b.make)) AS mk, lower(trim(b.model)) AS mdl,
percentile_cont(0.5) WITHIN GROUP (ORDER BY b.price) AS cohort_median_price
FROM base b
WHERE b.r_missing IS NULL
GROUP BY lower(trim(b.make)), lower(trim(b.model))
),
flagged AS (
SELECT b.id, b.vin, b.year, b.make, b.model, b.trim, b.price, b.mileage,
b.zipcode, b.state, b.source, b.accident_flag, b.listing_date,
m.cohort_median_price,
b.r_missing, b.r_old, b.r_mileage, b.r_salvage,
CASE WHEN m.cohort_median_price IS NOT NULL
AND (b.price < 0.25 * m.cohort_median_price
OR b.price > 3.00 * m.cohort_median_price)
THEN 'price_outlier' END AS r_price
FROM base b
LEFT JOIN with_median m
ON m.mk = lower(trim(b.make)) AND m.mdl = lower(trim(b.model))
)
SELECT id, vin, year, make, model, trim, price, mileage, zipcode, state,
source, accident_flag, listing_date, cohort_median_price,
COALESCE(r_missing, r_salvage, r_old, r_mileage, r_price) AS reason_for_exclusion,
(COALESCE(r_missing, r_salvage, r_old, r_mileage, r_price) IS NULL) AS included
FROM flagged;
GRANT SELECT ON public.listing_audit TO anon, authenticated, service_role;
-- 3. CLEANED LISTINGS VIEW -------------------------------------------
CREATE OR REPLACE VIEW public.cleaned_listings
WITH (security_invoker = true) AS
SELECT id, vin, year, make, model, trim, price, mileage,
zipcode, state, source, accident_flag, listing_date
FROM public.listing_audit
WHERE included = true;
GRANT SELECT ON public.cleaned_listings TO anon, authenticated, service_role;
-- 4. AUDIT COUNTS VIEW (7 required counters) -------------------------
CREATE OR REPLACE VIEW public.audit_counts
WITH (security_invoker = true) AS
SELECT
(SELECT count(*) FROM public.listings)::INT AS original_listing_count,
(SELECT count(*) FROM public.listing_audit WHERE included)::INT AS cleaned_listing_count,
(SELECT count(*) FROM public.listing_audit WHERE reason_for_exclusion='old_listing')::INT AS excluded_old_listing_count,
(SELECT count(*) FROM public.listing_audit WHERE reason_for_exclusion='mileage_outlier')::INT AS excluded_mileage_outlier_count,
(SELECT count(*) FROM public.listing_audit WHERE reason_for_exclusion='salvage_title')::INT AS excluded_salvage_title_count,
(SELECT count(*) FROM public.listing_audit WHERE reason_for_exclusion='price_outlier')::INT AS excluded_price_outlier_count,
(SELECT count(*) FROM public.listing_audit WHERE reason_for_exclusion='missing_required_fields')::INT AS excluded_missing_required_fields_count;
GRANT SELECT ON public.audit_counts TO anon, authenticated, service_role;
-- 5. OLS REGRESSION via CRAMER'S RULE (pure SQL, 3x3) -----------------
-- Solves Beta = (X^T X)^{-1} X^T Y for the model
-- price = b0 + b1*year + b2*mileage
-- using determinants computed by Sarrus expansion.
CREATE OR REPLACE FUNCTION public.calc_ols_camry(p_accident BOOLEAN)
RETURNS TABLE(
intercept NUMERIC,
year_coefficient NUMERIC,
mileage_coefficient NUMERIC,
sample_count INT,
r_squared NUMERIC
)
LANGUAGE plpgsql
STABLE
SECURITY INVOKER
SET search_path = public
AS $fn$
DECLARE
n NUMERIC; sy NUMERIC; sm NUMERIC;
syy NUMERIC; smm NUMERIC; sym NUMERIC;
sp NUMERIC; syp NUMERIC; smp NUMERIC;
detA NUMERIC; det0 NUMERIC; det1 NUMERIC; det2 NUMERIC;
b0 NUMERIC; b1 NUMERIC; b2 NUMERIC;
mean_p NUMERIC; tss NUMERIC; rss NUMERIC;
BEGIN
SELECT count(*)::NUMERIC,
sum(year::NUMERIC),
sum(mileage),
sum(year::NUMERIC * year::NUMERIC),
sum(mileage * mileage),
sum(year::NUMERIC * mileage),
sum(price),
sum(year::NUMERIC * price),
sum(mileage * price),
avg(price)
INTO n, sy, sm, syy, smm, sym, sp, syp, smp, mean_p
FROM public.cleaned_listings
WHERE lower(make) = 'toyota'
AND lower(model) = 'camry'
AND accident_flag = p_accident;
IF n IS NULL OR n < 3 THEN
intercept := NULL; year_coefficient := NULL; mileage_coefficient := NULL;
sample_count := COALESCE(n,0)::INT; r_squared := NULL;
RETURN NEXT; RETURN;
END IF;
-- 3x3 determinant of X^T X via cofactor expansion along the first row
detA := n * (syy*smm - sym*sym)
- sy * (sy *smm - sym*sm )
+ sm * (sy *sym - syy*sm );
-- Replace column 0 (intercept column) with X^T Y
det0 := sp * (syy*smm - sym*sym)
- sy * (syp*smm - sym*smp)
+ sm * (syp*sym - syy*smp);
-- Replace column 1 (year column) with X^T Y
det1 := n * (syp*smm - smp*sym)
- sp * (sy *smm - sym*sm )
+ sm * (sy *smp - syp*sm );
-- Replace column 2 (mileage column) with X^T Y
det2 := n * (syy*smp - syp*sym)
- sy * (sy *smp - syp*sm )
+ sp * (sy *sym - syy*sm );
IF detA = 0 THEN
intercept := NULL; year_coefficient := NULL; mileage_coefficient := NULL;
sample_count := n::INT; r_squared := NULL;
RETURN NEXT; RETURN;
END IF;
b0 := det0 / detA;
b1 := det1 / detA;
b2 := det2 / detA;
SELECT sum((price - mean_p)^2),
sum((price - (b0 + b1*year::NUMERIC + b2*mileage))^2)
INTO tss, rss
FROM public.cleaned_listings
WHERE lower(make) = 'toyota'
AND lower(model) = 'camry'
AND accident_flag = p_accident;
intercept := b0;
year_coefficient := b1;
mileage_coefficient := b2;
sample_count := n::INT;
r_squared := CASE WHEN tss IS NULL OR tss = 0 THEN NULL ELSE 1 - (rss / tss) END;
RETURN NEXT;
END;
$fn$;
GRANT EXECUTE ON FUNCTION public.calc_ols_camry(BOOLEAN) TO anon, authenticated, service_role;
-- 6. SMART RPC WITH REMEDIATION LOOP ---------------------------------
CREATE OR REPLACE FUNCTION public.get_vehicle_diminished_value(
p_year INT,
p_make TEXT,
p_model TEXT,
p_mileage NUMERIC
)
RETURNS TABLE(
predicted_clean_value NUMERIC,
predicted_accident_value NUMERIC,
diminished_value NUMERIC,
diminished_value_percent NUMERIC,
clean_sample_count INT,
accident_sample_count INT,
r_squared_clean NUMERIC,
r_squared_accident NUMERIC,
confidence_score NUMERIC,
validation_status TEXT,
warning_message TEXT,
remedy_applied TEXT,
requires_manual_review BOOLEAN
)
LANGUAGE plpgsql
STABLE
SECURITY INVOKER
SET search_path = public
AS $rpc$
DECLARE
c RECORD; a RECORD;
pred_c NUMERIC; pred_a NUMERIC;
dv NUMERIC; dvp NUMERIC;
v_status TEXT := 'PASS';
v_warn TEXT := NULL;
v_remedy TEXT := 'none';
v_manual BOOLEAN := false;
v_conf NUMERIC;
BEGIN
-- POC scope: Toyota Camry cohort
SELECT * INTO c FROM public.calc_ols_camry(false);
SELECT * INTO a FROM public.calc_ols_camry(true);
IF c.intercept IS NULL OR a.intercept IS NULL THEN
v_status := 'INSUFFICIENT_DATA';
v_warn := 'Not enough cleaned rows in one or both cohorts (need >= 3).';
v_manual := true;
v_remedy := 'manual_review_required';
ELSE
pred_c := c.intercept + c.year_coefficient * p_year + c.mileage_coefficient * p_mileage;
pred_a := a.intercept + a.year_coefficient * p_year + a.mileage_coefficient * p_mileage;
dv := pred_c - pred_a;
dvp := CASE WHEN pred_c > 0 THEN dv / pred_c ELSE NULL END;
-- 10%-25% validation loop
IF dvp IS NULL OR dvp < 0.10 OR dvp > 0.25 THEN
v_status := 'OUT_OF_RANGE';
v_remedy := 'flagged_for_remediation';
v_warn := 'Initial DV% outside 10-25% band; remediation engaged.';
-- Step A: enhanced bounds (clamp to band)
IF dvp IS NOT NULL THEN
IF dvp < 0.10 THEN
dvp := 0.10; v_remedy := 'enhanced_bounds_lower_clamp';
ELSIF dvp > 0.25 THEN
dvp := 0.25; v_remedy := 'enhanced_bounds_upper_clamp';
END IF;
dv := pred_c * dvp;
v_status := 'REMEDIATED';
END IF;
-- Step B: low-sample fallback
IF c.sample_count < 10 OR a.sample_count < 5 THEN
v_remedy := v_remedy || '+low_sample_fallback';
v_manual := true;
v_warn := v_warn || ' Low sample counts triggered manual review.';
END IF;
END IF;
END IF;
v_conf := LEAST(COALESCE(c.r_squared, 0), COALESCE(a.r_squared, 0));
IF c.sample_count < 10 OR a.sample_count < 5 THEN
v_conf := v_conf * 0.5;
END IF;
predicted_clean_value := pred_c;
predicted_accident_value := pred_a;
diminished_value := dv;
diminished_value_percent := dvp;
clean_sample_count := c.sample_count;
accident_sample_count := a.sample_count;
r_squared_clean := c.r_squared;
r_squared_accident := a.r_squared;
confidence_score := v_conf;
validation_status := v_status;
warning_message := v_warn;
remedy_applied := v_remedy;
requires_manual_review := v_manual;
RETURN NEXT;
END;
$rpc$;
GRANT EXECUTE ON FUNCTION public.get_vehicle_diminished_value(INT, TEXT, TEXT, NUMERIC)
TO anon, authenticated, service_role;
Loading verification data…