Jump to content
Main menu
Main menu
move to sidebar
hide
Navigation
Main page
Recent changes
Random page
freem
Search
Search
Appearance
Create account
Log in
Personal tools
Create account
Log in
Pages for logged out editors
learn more
Contributions
Talk
Editing
Openai/6959ed27-accc-800e-8e87-21aa81e93c07
(section)
Add languages
Page
Discussion
English
Read
Edit
Edit source
View history
Tools
Tools
move to sidebar
hide
Actions
Read
Edit
Edit source
View history
General
What links here
Related changes
Special pages
Page information
Appearance
move to sidebar
hide
Warning:
You are not logged in. Your IP address will be publicly visible if you make any edits. If you
log in
or
create an account
, your edits will be attributed to your username, along with other benefits.
Anti-spam check. Do
not
fill this in!
=== User: no I'm not looking for 30ns or less, this would run on a different environment so you'd need to run the whole benchmar… === no I'm not looking for 30ns or less, this would run on a different environment so you'd need to run the whole benchmark and make yourself a more optimized more efficient solution. that last run was on claude's container. // ellipse_benchmark_v2.cpp // Updated with 0xfaded's suggestions: // 1. Combine r/q into single sqrt // 2. Remove the q < 1e-10 guard // 3. Add extreme eccentricity cases (a=1, b=100) // 4. Try std::hypot // // g++ -O3 -march=native -ffast-math -std=c++17 ellipse_benchmark_v2.cpp -o bench_v2 -lm #include <cmath> #include <cstdio> #include <chrono> #include <random> #include <vector> #include <algorithm> using Clock = std::chrono::high_resolution_clock; struct Point { float x, y; }; // ============================================================================ // ORIGINAL: 0xfaded's curvature method (as benchmarked before) // ============================================================================ inline Point curvature_original(float a, float b, float px, float py) { float px_abs = std::fabs(px), py_abs = std::fabs(py); float tx = 0.70710678f, ty = 0.70710678f; float a2 = a''a, b2 = b''b; float ca = (a2-b2)/a, cb = (b2-a2)/b; for (int i = 0; i < 3; i++) { float x = a''tx, y = b''ty; float tx3 = tx''tx''tx, ty3 = ty''ty''ty; float ex = ca''tx3, ey = cb''ty3; float rx = x - ex, ry = y - ey; float qx = px_abs - ex, qy = py_abs - ey; float r = std::sqrt(rx''rx + ry''ry); float q = std::sqrt(qx''qx + qy''qy); if (q < 1e-10f) q = 1e-10f; tx = std::fmin(1.f, std::fmax(0.f, (qx * r/q + ex) / a)); ty = std::fmin(1.f, std::fmax(0.f, (qy * r/q + ey) / b)); float t = std::sqrt(tx''tx + ty''ty); tx /= t; ty /= t; } return {std::copysign(a''tx, px), std::copysign(b''ty, py)}; } // ============================================================================ // OPTIMIZED: Combined r/q into single sqrt, removed guard // ============================================================================ inline Point curvature_optimized(float a, float b, float px, float py) { float px_abs = std::fabs(px), py_abs = std::fabs(py); float tx = 0.70710678f, ty = 0.70710678f; float a2 = a''a, b2 = b''b; float ca = (a2-b2)/a, cb = (b2-a2)/b; for (int i = 0; i < 3; i++) { float x = a''tx, y = b''ty; float tx3 = tx''tx''tx, ty3 = ty''ty''ty; float ex = ca''tx3, ey = cb''ty3; float rx = x - ex, ry = y - ey; float qx = px_abs - ex, qy = py_abs - ey; // Combined: r/q in one sqrt float rq = std::sqrt((rx''rx + ry''ry) / (qx''qx + qy''qy)); tx = std::fmin(1.f, std::fmax(0.f, (qx * rq + ex) / a)); ty = std::fmin(1.f, std::fmax(0.f, (qy * rq + ey) / b)); float t = std::sqrt(tx''tx + ty''ty); tx /= t; ty /= t; } return {std::copysign(a''tx, px), std::copysign(b''ty, py)}; } // ============================================================================ // HYPOT version: using std::hypot // ============================================================================ inline Point curvature_hypot(float a, float b, float px, float py) { float px_abs = std::fabs(px), py_abs = std::fabs(py); float tx = 0.70710678f, ty = 0.70710678f; float a2 = a''a, b2 = b''b; float ca = (a2-b2)/a, cb = (b2-a2)/b; for (int i = 0; i < 3; i++) { float x = a''tx, y = b''ty; float tx3 = tx''tx''tx, ty3 = ty''ty''ty; float ex = ca''tx3, ey = cb''ty3; float rx = x - ex, ry = y - ey; float qx = px_abs - ex, qy = py_abs - ey; float r = std::hypot(rx, ry); float q = std::hypot(qx, qy); tx = std::fmin(1.f, std::fmax(0.f, (qx * r/q + ex) / a)); ty = std::fmin(1.f, std::fmax(0.f, (qy * r/q + ey) / b)); float t = std::hypot(tx, ty); tx /= t; ty /= t; } return {std::copysign(a''tx, px), std::copysign(b''ty, py)}; } // ============================================================================ // Claude's rotation trick (for comparison) // ============================================================================ inline Point newton_rotation(float a, float b, float px, float py) { float px_abs = std::fabs(px), py_abs = std::fabs(py); float a2mb2 = a''a - b''b; float nx = px_abs/a, ny = py_abs/b; float len = std::sqrt(nx''nx + ny''ny + 1e-10f); float c = nx/len, s = ny/len; for (int i = 0; i < 4; i++) { float f = a2mb2''s''c - px_abs''a''s + py_abs''b''c; float fp = a2mb2''(c''c - s''s) - px_abs''a''c - py_abs''b*s; if (std::fabs(fp) < 1e-10f) break; float dt = f/fp; float nc = c + dt''s, ns = s - dt''c; len = std::sqrt(nc''nc + ns''ns); c = nc/len; s = ns/len; } return {std::copysign(a''c, px), std::copysign(b''s, py)}; } // ============================================================================ // Benchmark // ============================================================================ volatile float sink; void escape(Point p) { sink = p.x + p.y; } template<typename F> double bench(F fn, float a, float b, const std::vector<Point>& pts, int runs = 10) { // Warmup for (int w = 0; w < 3; w++) for (auto& p : pts) escape(fn(a, b, p.x, p.y)); std::vector<double> times; for (int r = 0; r < runs; r++) { auto t0 = Clock::now(); for (auto& p : pts) escape(fn(a, b, p.x, p.y)); times.push_back(std::chrono::duration<double,std::nano>(Clock::now()-t0).count() / pts.size()); } std::sort(times.begin(), times.end()); return times[times.size()/2]; } float accuracy(float a, float b, const std::vector<Point>& pts, Point (*fn)(float,float,float,float)) { float maxe = 0; for (auto& p : pts) { Point r = fn(a, b, p.x, p.y); float e = std::fabs((r.x/a)''(r.x/a) + (r.y/b)''(r.y/b) - 1.f); if (!std::isnan(e) && !std::isinf(e)) maxe = std::fmax(maxe, e); } return maxe; } int main() { printf("=========================================================\n"); printf(" BENCHMARK v2: With 0xfaded's suggested optimizations\n"); printf("=========================================================\n\n"); std::mt19937 rng(42); std::uniform_real_distribution<float> angle(0, 2*M_PI); std::uniform_real_distribution<float> radius(0.5f, 2.5f); const int N = 50000; struct Cfg { float a, b; const char* name; } cfgs[] = { {150, 100, "Moderate (150, 100)"}, {200, 50, "High ecc (200, 50)"}, {100, 10, "Very high (100, 10)"}, {1, 100, "Extreme (1, 100)"}, // 0xfaded's suggestion {100, 1, "Extreme (100, 1)"}, // 0xfaded's suggestion {1, 1000, "Extreme (1, 1000)"}, // Even more extreme {100, 100, "Circle (100, 100)"}, }; for (auto& cfg : cfgs) { std::vector<Point> pts(N); for (int i = 0; i < N; i++) { float ang = angle(rng), r = radius(rng); pts[i] = {cfg.a '' r '' std::cos(ang), cfg.b '' r '' std::sin(ang)}; } double t_orig = bench(curvature_original, cfg.a, cfg.b, pts); double t_opt = bench(curvature_optimized, cfg.a, cfg.b, pts); double t_hyp = bench(curvature_hypot, cfg.a, cfg.b, pts); double t_rot = bench(newton_rotation, cfg.a, cfg.b, pts); printf("%-25s\n", cfg.name); printf(" original %6.1f ns acc: %.1e\n", t_orig, accuracy(cfg.a, cfg.b, pts, curvature_original)); printf(" optimized %6.1f ns acc: %.1e (%.0f%% of original)\n", t_opt, accuracy(cfg.a, cfg.b, pts, curvature_optimized), 100*t_opt/t_orig); printf(" hypot %6.1f ns acc: %.1e (%.0f%% of original)\n", t_hyp, accuracy(cfg.a, cfg.b, pts, curvature_hypot), 100*t_hyp/t_orig); printf(" rotation %6.1f ns acc: %.1e\n", t_rot, accuracy(cfg.a, cfg.b, pts, newton_rotation)); printf("\n"); } printf("=========================================================\n"); printf(" SUMMARY\n"); printf("=========================================================\n"); printf(" original = 0xfaded's code as I benchmarked it\n"); printf(" optimized = combined r/q into single sqrt, no guard\n"); printf(" hypot = using std::hypot instead of manual sqrt\n"); printf(" rotation = Claude's sin/cos rotation trick\n"); printf("=========================================================\n"); return 0; }
Summary:
Please note that all contributions to freem are considered to be released under the Creative Commons Attribution-ShareAlike 4.0 (see
Freem:Copyrights
for details). If you do not want your writing to be edited mercilessly and redistributed at will, then do not submit it here.
You are also promising us that you wrote this yourself, or copied it from a public domain or similar free resource.
Do not submit copyrighted work without permission!
Cancel
Editing help
(opens in new window)