National Accident Stigma Database · Paid POC

Technical Verification Screen

Backend-first proof of concept. Postgres schema, V1 cleaning rules, 3-variable OLS regression solved in pure SQL via Cramer's Rule, and the smart diminished-value RPC with a 10%–25% validation/remediation loop.

1 · Audit Counters

Seven required structural metrics from the audit view.

Original listings
Cleaned listings
Excluded · old
Excluded · mileage
Excluded · salvage
Excluded · price
Excluded · missing

2 · OLS Regression — SQL vs Python Reference

Pure-SQL Cramer's Rule implementation compared against numpy closed-form OLS on the same seeded data.

Clean cohort (accident_flag = false)
MetricSQL (Cramer)Python (numpy)|Δ|
intercept (β₀)-2,947,669.6854
year coefficient (β₁)1,470.62949313
mileage coefficient (β₂)-0.0621497135
r_squared0.9889860361
sample_count18
Accident cohort (accident_flag = true)
MetricSQL (Cramer)Python (numpy)|Δ|
intercept (β₀)-3,000,175.0619
year coefficient (β₁)1,494.88407012
mileage coefficient (β₂)-0.0499646763
r_squared0.9730032371
sample_count12

3 · Diminished-Value RPC Calculator

Live execution of get_vehicle_diminished_value() with the full 10%–25% validation/remediation loop.

Run the RPC to see all 13 return fields.

4 · Raw Listings vs Excluded Rows

Each row labeled with the precise reason for exclusion from listing_audit.

Included (0)
VINYearPriceMileageAcc
Excluded (0)
VINYearPriceMileageAccReason

5 · SQL Migration Script

The exact migration applied to this Lovable Cloud backend. Copy or download to re-create the schema anywhere.

-- =====================================================================
-- National Accident Stigma Database (NASD) — POC backend
-- Tables, views, indexes, OLS regression (Cramer's Rule), and RPC.
-- =====================================================================

-- 1. BASE TABLE -------------------------------------------------------
CREATE TABLE IF NOT EXISTS public.listings (
  id            BIGSERIAL PRIMARY KEY,
  vin           TEXT,
  year          INT,
  make          TEXT,
  model         TEXT,
  trim          TEXT,
  price         NUMERIC,
  mileage       NUMERIC,
  zipcode       TEXT,
  state         TEXT,
  source        TEXT,
  accident_flag BOOLEAN NOT NULL DEFAULT false,
  listing_date  DATE,
  created_at    TIMESTAMPTZ NOT NULL DEFAULT now()
);

GRANT SELECT, INSERT, UPDATE, DELETE ON public.listings TO authenticated;
GRANT SELECT ON public.listings TO anon;
GRANT ALL ON public.listings TO service_role;
GRANT USAGE, SELECT ON SEQUENCE public.listings_id_seq TO anon, authenticated, service_role;

ALTER TABLE public.listings ENABLE ROW LEVEL SECURITY;

DROP POLICY IF EXISTS "Public read listings (POC)" ON public.listings;
CREATE POLICY "Public read listings (POC)" ON public.listings FOR SELECT USING (true);

CREATE INDEX IF NOT EXISTS idx_listings_make_model ON public.listings (lower(trim(make)), lower(trim(model)));
CREATE INDEX IF NOT EXISTS idx_listings_listing_date ON public.listings (listing_date);
CREATE INDEX IF NOT EXISTS idx_listings_accident_flag ON public.listings (accident_flag);

-- 2. AUDIT VIEW (each row labeled Included / Excluded + reason) -------
CREATE OR REPLACE VIEW public.listing_audit
WITH (security_invoker = true) AS
WITH base AS (
  SELECT l.*,
    CASE WHEN l.vin IS NULL OR l.year IS NULL OR l.make IS NULL
              OR l.model IS NULL OR l.price IS NULL
              OR l.mileage IS NULL OR l.listing_date IS NULL
         THEN 'missing_required_fields' END AS r_missing,
    CASE WHEN l.listing_date IS NOT NULL
              AND l.listing_date < CURRENT_DATE - INTERVAL '365 days'
         THEN 'old_listing' END AS r_old,
    CASE WHEN l.mileage IS NOT NULL AND l.mileage > 250000
         THEN 'mileage_outlier' END AS r_mileage,
    CASE WHEN COALESCE(l.source,'') ~* '(salvage|rebuilt|flood|junk|lemon)'
           OR COALESCE(l.trim,'')   ~* '(salvage|rebuilt|flood|junk|lemon)'
         THEN 'salvage_title' END AS r_salvage
  FROM public.listings l
),
with_median AS (
  SELECT lower(trim(b.make)) AS mk, lower(trim(b.model)) AS mdl,
         percentile_cont(0.5) WITHIN GROUP (ORDER BY b.price) AS cohort_median_price
  FROM base b
  WHERE b.r_missing IS NULL
  GROUP BY lower(trim(b.make)), lower(trim(b.model))
),
flagged AS (
  SELECT b.id, b.vin, b.year, b.make, b.model, b.trim, b.price, b.mileage,
         b.zipcode, b.state, b.source, b.accident_flag, b.listing_date,
         m.cohort_median_price,
         b.r_missing, b.r_old, b.r_mileage, b.r_salvage,
         CASE WHEN m.cohort_median_price IS NOT NULL
                   AND (b.price < 0.25 * m.cohort_median_price
                     OR b.price > 3.00 * m.cohort_median_price)
              THEN 'price_outlier' END AS r_price
  FROM base b
  LEFT JOIN with_median m
    ON m.mk = lower(trim(b.make)) AND m.mdl = lower(trim(b.model))
)
SELECT id, vin, year, make, model, trim, price, mileage, zipcode, state,
       source, accident_flag, listing_date, cohort_median_price,
       COALESCE(r_missing, r_salvage, r_old, r_mileage, r_price) AS reason_for_exclusion,
       (COALESCE(r_missing, r_salvage, r_old, r_mileage, r_price) IS NULL) AS included
FROM flagged;

GRANT SELECT ON public.listing_audit TO anon, authenticated, service_role;

-- 3. CLEANED LISTINGS VIEW -------------------------------------------
CREATE OR REPLACE VIEW public.cleaned_listings
WITH (security_invoker = true) AS
SELECT id, vin, year, make, model, trim, price, mileage,
       zipcode, state, source, accident_flag, listing_date
FROM public.listing_audit
WHERE included = true;

GRANT SELECT ON public.cleaned_listings TO anon, authenticated, service_role;

-- 4. AUDIT COUNTS VIEW (7 required counters) -------------------------
CREATE OR REPLACE VIEW public.audit_counts
WITH (security_invoker = true) AS
SELECT
  (SELECT count(*) FROM public.listings)::INT                                          AS original_listing_count,
  (SELECT count(*) FROM public.listing_audit WHERE included)::INT                      AS cleaned_listing_count,
  (SELECT count(*) FROM public.listing_audit WHERE reason_for_exclusion='old_listing')::INT              AS excluded_old_listing_count,
  (SELECT count(*) FROM public.listing_audit WHERE reason_for_exclusion='mileage_outlier')::INT          AS excluded_mileage_outlier_count,
  (SELECT count(*) FROM public.listing_audit WHERE reason_for_exclusion='salvage_title')::INT            AS excluded_salvage_title_count,
  (SELECT count(*) FROM public.listing_audit WHERE reason_for_exclusion='price_outlier')::INT            AS excluded_price_outlier_count,
  (SELECT count(*) FROM public.listing_audit WHERE reason_for_exclusion='missing_required_fields')::INT  AS excluded_missing_required_fields_count;

GRANT SELECT ON public.audit_counts TO anon, authenticated, service_role;

-- 5. OLS REGRESSION via CRAMER'S RULE (pure SQL, 3x3) -----------------
-- Solves Beta = (X^T X)^{-1} X^T Y for the model
--   price = b0 + b1*year + b2*mileage
-- using determinants computed by Sarrus expansion.
CREATE OR REPLACE FUNCTION public.calc_ols_camry(p_accident BOOLEAN)
RETURNS TABLE(
  intercept           NUMERIC,
  year_coefficient    NUMERIC,
  mileage_coefficient NUMERIC,
  sample_count        INT,
  r_squared           NUMERIC
)
LANGUAGE plpgsql
STABLE
SECURITY INVOKER
SET search_path = public
AS $fn$
DECLARE
  n NUMERIC; sy NUMERIC; sm NUMERIC;
  syy NUMERIC; smm NUMERIC; sym NUMERIC;
  sp NUMERIC; syp NUMERIC; smp NUMERIC;
  detA NUMERIC; det0 NUMERIC; det1 NUMERIC; det2 NUMERIC;
  b0 NUMERIC; b1 NUMERIC; b2 NUMERIC;
  mean_p NUMERIC; tss NUMERIC; rss NUMERIC;
BEGIN
  SELECT count(*)::NUMERIC,
         sum(year::NUMERIC),
         sum(mileage),
         sum(year::NUMERIC * year::NUMERIC),
         sum(mileage * mileage),
         sum(year::NUMERIC * mileage),
         sum(price),
         sum(year::NUMERIC * price),
         sum(mileage * price),
         avg(price)
    INTO n, sy, sm, syy, smm, sym, sp, syp, smp, mean_p
    FROM public.cleaned_listings
   WHERE lower(make) = 'toyota'
     AND lower(model) = 'camry'
     AND accident_flag = p_accident;

  IF n IS NULL OR n < 3 THEN
    intercept := NULL; year_coefficient := NULL; mileage_coefficient := NULL;
    sample_count := COALESCE(n,0)::INT; r_squared := NULL;
    RETURN NEXT; RETURN;
  END IF;

  -- 3x3 determinant of X^T X via cofactor expansion along the first row
  detA :=  n   * (syy*smm - sym*sym)
         - sy  * (sy *smm - sym*sm )
         + sm  * (sy *sym - syy*sm );

  -- Replace column 0 (intercept column) with X^T Y
  det0 :=  sp  * (syy*smm - sym*sym)
         - sy  * (syp*smm - sym*smp)
         + sm  * (syp*sym - syy*smp);

  -- Replace column 1 (year column) with X^T Y
  det1 :=  n   * (syp*smm - smp*sym)
         - sp  * (sy *smm - sym*sm )
         + sm  * (sy *smp - syp*sm );

  -- Replace column 2 (mileage column) with X^T Y
  det2 :=  n   * (syy*smp - syp*sym)
         - sy  * (sy *smp - syp*sm )
         + sp  * (sy *sym - syy*sm );

  IF detA = 0 THEN
    intercept := NULL; year_coefficient := NULL; mileage_coefficient := NULL;
    sample_count := n::INT; r_squared := NULL;
    RETURN NEXT; RETURN;
  END IF;

  b0 := det0 / detA;
  b1 := det1 / detA;
  b2 := det2 / detA;

  SELECT sum((price - mean_p)^2),
         sum((price - (b0 + b1*year::NUMERIC + b2*mileage))^2)
    INTO tss, rss
    FROM public.cleaned_listings
   WHERE lower(make) = 'toyota'
     AND lower(model) = 'camry'
     AND accident_flag = p_accident;

  intercept := b0;
  year_coefficient := b1;
  mileage_coefficient := b2;
  sample_count := n::INT;
  r_squared := CASE WHEN tss IS NULL OR tss = 0 THEN NULL ELSE 1 - (rss / tss) END;
  RETURN NEXT;
END;
$fn$;

GRANT EXECUTE ON FUNCTION public.calc_ols_camry(BOOLEAN) TO anon, authenticated, service_role;

-- 6. SMART RPC WITH REMEDIATION LOOP ---------------------------------
CREATE OR REPLACE FUNCTION public.get_vehicle_diminished_value(
  p_year     INT,
  p_make     TEXT,
  p_model    TEXT,
  p_mileage  NUMERIC
)
RETURNS TABLE(
  predicted_clean_value     NUMERIC,
  predicted_accident_value  NUMERIC,
  diminished_value          NUMERIC,
  diminished_value_percent  NUMERIC,
  clean_sample_count        INT,
  accident_sample_count     INT,
  r_squared_clean           NUMERIC,
  r_squared_accident        NUMERIC,
  confidence_score          NUMERIC,
  validation_status         TEXT,
  warning_message           TEXT,
  remedy_applied            TEXT,
  requires_manual_review    BOOLEAN
)
LANGUAGE plpgsql
STABLE
SECURITY INVOKER
SET search_path = public
AS $rpc$
DECLARE
  c RECORD; a RECORD;
  pred_c NUMERIC; pred_a NUMERIC;
  dv NUMERIC; dvp NUMERIC;
  v_status TEXT := 'PASS';
  v_warn   TEXT := NULL;
  v_remedy TEXT := 'none';
  v_manual BOOLEAN := false;
  v_conf   NUMERIC;
BEGIN
  -- POC scope: Toyota Camry cohort
  SELECT * INTO c FROM public.calc_ols_camry(false);
  SELECT * INTO a FROM public.calc_ols_camry(true);

  IF c.intercept IS NULL OR a.intercept IS NULL THEN
    v_status := 'INSUFFICIENT_DATA';
    v_warn   := 'Not enough cleaned rows in one or both cohorts (need >= 3).';
    v_manual := true;
    v_remedy := 'manual_review_required';
  ELSE
    pred_c := c.intercept + c.year_coefficient * p_year + c.mileage_coefficient * p_mileage;
    pred_a := a.intercept + a.year_coefficient * p_year + a.mileage_coefficient * p_mileage;
    dv     := pred_c - pred_a;
    dvp    := CASE WHEN pred_c > 0 THEN dv / pred_c ELSE NULL END;

    -- 10%-25% validation loop
    IF dvp IS NULL OR dvp < 0.10 OR dvp > 0.25 THEN
      v_status := 'OUT_OF_RANGE';
      v_remedy := 'flagged_for_remediation';
      v_warn   := 'Initial DV% outside 10-25% band; remediation engaged.';

      -- Step A: enhanced bounds (clamp to band)
      IF dvp IS NOT NULL THEN
        IF dvp < 0.10 THEN
          dvp := 0.10; v_remedy := 'enhanced_bounds_lower_clamp';
        ELSIF dvp > 0.25 THEN
          dvp := 0.25; v_remedy := 'enhanced_bounds_upper_clamp';
        END IF;
        dv := pred_c * dvp;
        v_status := 'REMEDIATED';
      END IF;

      -- Step B: low-sample fallback
      IF c.sample_count < 10 OR a.sample_count < 5 THEN
        v_remedy := v_remedy || '+low_sample_fallback';
        v_manual := true;
        v_warn   := v_warn || ' Low sample counts triggered manual review.';
      END IF;
    END IF;
  END IF;

  v_conf := LEAST(COALESCE(c.r_squared, 0), COALESCE(a.r_squared, 0));
  IF c.sample_count < 10 OR a.sample_count < 5 THEN
    v_conf := v_conf * 0.5;
  END IF;

  predicted_clean_value    := pred_c;
  predicted_accident_value := pred_a;
  diminished_value         := dv;
  diminished_value_percent := dvp;
  clean_sample_count       := c.sample_count;
  accident_sample_count    := a.sample_count;
  r_squared_clean          := c.r_squared;
  r_squared_accident       := a.r_squared;
  confidence_score         := v_conf;
  validation_status        := v_status;
  warning_message          := v_warn;
  remedy_applied           := v_remedy;
  requires_manual_review   := v_manual;
  RETURN NEXT;
END;
$rpc$;

GRANT EXECUTE ON FUNCTION public.get_vehicle_diminished_value(INT, TEXT, TEXT, NUMERIC)
  TO anon, authenticated, service_role;
Loading verification data…