
wwcd2016
@wwcd2016
-
AI生成了一个LLM GPU显存计算器 -
双3090 + Ollama 加载 Q8 视觉模型瞬间断电重启,求老哥们把脉我用利民全日系的850w的具体型号TR-SP850,插了2个3090 也没有崩,不过我仅仅是测试了一下,长期跑也没有必要。
长期跑必须sp1000以上才靠谱。
电源是根,必须用好品牌,什么航嘉,长城绝对不行的。 -
AI生成了一个LLM GPU显存计算器deepseekv4 pro生成:
<!DOCTYPE html> <html lang="zh-CN"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>LLM 显存计算器 · GPU内存需求估算</title> <style> * { box-sizing: border-box; margin: 0; padding: 0; } :root { --bg: #080c14; --bg2: #0e1420; --card: rgba(255,255,255,0.04); --card-border: rgba(255,255,255,0.08); --accent: #6366f1; --accent2: #a78bfa; --green: #22d3a0; --yellow: #f59e0b; --red: #f43f5e; --text: #e2e8f0; --muted: #64748b; } body { font-family: 'Segoe UI', system-ui, sans-serif; background: var(--bg); color: var(--text); min-height: 100vh; padding: 32px 16px; } .page { max-width: 960px; margin: 0 auto; } header { text-align: center; margin-bottom: 40px; } header h1 { font-size: 2.4rem; font-weight: 700; letter-spacing: -0.5px; background: linear-gradient(135deg, #818cf8, #c084fc, #38bdf8); -webkit-background-clip: text; -webkit-text-fill-color: transparent; margin-bottom: 8px; } header p { color: var(--muted); font-size: 0.95rem; } .grid { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; } @media (max-width: 700px) { .grid { grid-template-columns: 1fr; } } .card { background: var(--card); border: 1px solid var(--card-border); border-radius: 16px; padding: 24px; backdrop-filter: blur(8px); } .card h2 { font-size: 0.8rem; font-weight: 600; letter-spacing: 0.08em; text-transform: uppercase; color: var(--muted); margin-bottom: 20px; } .slider-group { margin-bottom: 22px; } .slider-header { display: flex; justify-content: space-between; align-items: baseline; margin-bottom: 8px; } .slider-label { font-size: 0.88rem; color: #94a3b8; } .slider-value { font-size: 1rem; font-weight: 700; color: #c4b5fd; font-variant-numeric: tabular-nums; transition: color 0.2s; } input[type=range] { -webkit-appearance: none; appearance: none; width: 100%; height: 6px; border-radius: 3px; background: rgba(255,255,255,0.1); outline: none; cursor: pointer; } input[type=range]::-webkit-slider-thumb { -webkit-appearance: none; appearance: none; width: 18px; height: 18px; border-radius: 50%; background: linear-gradient(135deg, #6366f1, #a78bfa); cursor: pointer; transition: transform 0.15s, box-shadow 0.15s; box-shadow: 0 0 0 3px rgba(99,102,241,0.25); } input[type=range]::-webkit-slider-thumb:hover { transform: scale(1.2); box-shadow: 0 0 0 5px rgba(99,102,241,0.35); } input[type=range]::-moz-range-thumb { width: 18px; height: 18px; border-radius: 50%; border: none; background: linear-gradient(135deg, #6366f1, #a78bfa); cursor: pointer; } .result-card { background: linear-gradient(135deg, rgba(99,102,241,0.12), rgba(167,139,250,0.08)); border: 1px solid rgba(99,102,241,0.3); border-radius: 16px; padding: 28px; text-align: center; grid-column: 1 / -1; position: relative; overflow: hidden; } .result-card::before { content: ''; position: absolute; top: -60px; right: -60px; width: 200px; height: 200px; border-radius: 50%; background: radial-gradient(circle, rgba(99,102,241,0.15), transparent 70%); pointer-events: none; } .vram-total { font-size: 4rem; font-weight: 800; letter-spacing: -2px; font-variant-numeric: tabular-nums; background: linear-gradient(135deg, #818cf8, #c084fc); -webkit-background-clip: text; -webkit-text-fill-color: transparent; line-height: 1; margin: 8px 0 4px; transition: all 0.3s; } .vram-unit { font-size: 1.3rem; font-weight: 500; color: var(--muted); } .vram-label { font-size: 0.8rem; letter-spacing: 0.1em; text-transform: uppercase; color: var(--muted); } .breakdown { margin-top: 24px; } .breakdown-row { display: flex; align-items: center; gap: 10px; margin-bottom: 10px; } .breakdown-dot { width: 10px; height: 10px; border-radius: 50%; flex-shrink: 0; } .breakdown-name { font-size: 0.82rem; color: #94a3b8; width: 130px; flex-shrink: 0; text-align: left; } .breakdown-bar-wrap { flex: 1; height: 8px; background: rgba(255,255,255,0.06); border-radius: 4px; overflow: hidden; } .breakdown-bar { height: 100%; border-radius: 4px; transition: width 0.4s cubic-bezier(0.4,0,0.2,1); } .breakdown-gb { font-size: 0.82rem; font-weight: 600; font-variant-numeric: tabular-norms; min-width: 70px; text-align: right; color: var(--text); } .gpu-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(140px, 1fr)); gap: 10px; margin-top: 8px; } .gpu-tile { background: rgba(255,255,255,0.03); border: 1px solid var(--card-border); border-radius: 10px; padding: 10px 12px; display: flex; flex-direction: column; gap: 4px; transition: border-color 0.25s, background 0.25s, opacity 0.25s; } .gpu-tile.fits { border-color: rgba(34,211,160,0.4); background: rgba(34,211,160,0.06); } .gpu-tile.tight { border-color: rgba(245,158,11,0.4); background: rgba(245,158,11,0.06); } .gpu-tile.nope { opacity: 0.35; } .gpu-name { font-size: 0.75rem; font-weight: 600; color: #cbd5e1; } .gpu-mem { font-size: 0.72rem; color: var(--muted); } .gpu-status { font-size: 0.68rem; font-weight: 700; letter-spacing: 0.05em; margin-top: 2px; } .gpu-tile.fits .gpu-status { color: var(--green); } .gpu-tile.tight .gpu-status { color: var(--yellow); } .gpu-tile.nope .gpu-status { color: var(--muted); } .arch-grid { display: grid; grid-template-columns: repeat(4, 1fr); gap: 12px; margin-top: 4px; } @media (max-width: 500px) { .arch-grid { grid-template-columns: repeat(2, 1fr); } } .arch-item { background: rgba(255,255,255,0.03); border: 1px solid var(--card-border); border-radius: 10px; padding: 10px 14px; text-align: center; } .arch-val { font-size: 1.1rem; font-weight: 700; color: #818cf8; } .arch-key { font-size: 0.68rem; color: var(--muted); text-transform: uppercase; letter-spacing: 0.06em; margin-top: 2px; } .formula-note { background: rgba(255,255,255,0.02); border: 1px solid var(--card-border); border-radius: 10px; padding: 12px 16px; margin-top: 8px; font-size: 0.78rem; color: var(--muted); line-height: 1.6; } .formula-note code { background: rgba(255,255,255,0.07); padding: 1px 5px; border-radius: 4px; font-family: 'JetBrains Mono', 'Fira Code', monospace; font-size: 0.85em; color: #c4b5fd; } .tick-row { display: flex; justify-content: space-between; margin-top: 4px; } .tick { font-size: 0.65rem; color: var(--muted); } @keyframes pop { 0% { transform: scale(1); } 50% { transform: scale(1.05); } 100% { transform: scale(1); } } .pop { animation: pop 0.25s ease-out; } </style> </head> <body> <div class="page"> <header> <h1>LLM 显存计算器</h1> <p>估算大语言模型推理所需的 GPU 显存</p> </header> <div class="grid"> <!-- 左侧:控制区 --> <div class="card"> <h2>模型参数</h2> <!-- 模型大小(档位已扩展:增加20B和27B) --> <div class="slider-group"> <div class="slider-header"> <span class="slider-label">模型大小</span> <span class="slider-value" id="val-model">7 B</span> </div> <input type="range" id="sl-model" min="0" max="15" step="1" value="4"> <div class="tick-row"> <span class="tick">0.5B</span><span class="tick">7B</span> <span class="tick">70B</span><span class="tick">671B</span> </div> </div> <!-- 权重量化 --> <div class="slider-group"> <div class="slider-header"> <span class="slider-label">权重量化</span> <span class="slider-value" id="val-quant">FP16 (16-bit)</span> </div> <input type="range" id="sl-quant" min="0" max="7" step="1" value="6"> <div class="tick-row"> <span class="tick">2-bit</span><span class="tick">Q4</span> <span class="tick">Q8</span><span class="tick">FP32</span> </div> </div> <!-- 上下文长度 --> <div class="slider-group"> <div class="slider-header"> <span class="slider-label">上下文长度</span> <span class="slider-value" id="val-ctx">4 K tokens</span> </div> <input type="range" id="sl-ctx" min="0" max="11" step="1" value="3"> <div class="tick-row"> <span class="tick">512</span><span class="tick">4K</span> <span class="tick">128K</span><span class="tick">1M</span> </div> </div> <!-- KV Cache 量化 --> <div class="slider-group"> <div class="slider-header"> <span class="slider-label">KV Cache 量化</span> <span class="slider-value" id="val-kv">FP16 (16-bit)</span> </div> <input type="range" id="sl-kv" min="0" max="2" step="1" value="2"> <div class="tick-row"> <span class="tick">4-bit</span><span class="tick">8-bit</span> <span class="tick">FP16</span> </div> </div> <!-- 批量大小 --> <div class="slider-group" style="margin-bottom:0"> <div class="slider-header"> <span class="slider-label">批量大小</span> <span class="slider-value" id="val-batch">1</span> </div> <input type="range" id="sl-batch" min="0" max="5" step="1" value="0"> <div class="tick-row"> <span class="tick">1</span><span class="tick">4</span> <span class="tick">16</span><span class="tick">32</span> </div> </div> </div> <!-- 右侧:架构信息 + 公式 --> <div class="card"> <h2>估算架构</h2> <div class="arch-grid"> <div class="arch-item"><div class="arch-val" id="arch-layers">32</div><div class="arch-key">层数</div></div> <div class="arch-item"><div class="arch-val" id="arch-hidden">4096</div><div class="arch-key">隐藏维度</div></div> <div class="arch-item"><div class="arch-val" id="arch-kv-heads">8</div><div class="arch-key">KV 头数</div></div> <div class="arch-item"><div class="arch-val" id="arch-head-dim">128</div><div class="arch-key">头维度</div></div> </div> <div style="margin-top:20px"> <h2>显存公式</h2> <div class="formula-note"> <b style="color:#c4b5fd">模型权重</b> = 参数量 × 每参数字节数<br> <b style="color:#34d399">KV Cache</b> = 长度 × 批量 × 2 × 层数 × kv_heads × head_dim × kv字节数<br> <b style="color:#f59e0b">运行时开销</b> = (权重 + KV cache) × 10%<br><br> <code>总显存 = 权重 + KV Cache + 开销</code> </div> </div> <div style="margin-top:20px"> <h2>量化字节/参数量</h2> <div class="formula-note"> 2-bit <code>0.25 B</code> · 3-bit <code>0.375 B</code> · 4-bit <code>0.5 B</code> · 5-bit <code>0.625 B</code> · 6-bit <code>0.75 B</code> · 8-bit / Q8 <code>1 B</code> · FP16 / BF16 <code>2 B</code> · FP32 <code>4 B</code> </div> </div> </div> <!-- 全宽:总显存结果 --> <div class="result-card"> <div class="vram-label">所需显存</div> <div class="vram-total" id="vram-total">0.00</div> <div class="vram-unit">GB</div> <div class="breakdown" id="breakdown"> <div class="breakdown-row"> <div class="breakdown-dot" style="background:#818cf8"></div> <div class="breakdown-name">模型权重</div> <div class="breakdown-bar-wrap"><div class="breakdown-bar" id="bar-weights" style="background:#818cf8; width:0%"></div></div> <div class="breakdown-gb" id="gb-weights">0.00 GB</div> </div> <div class="breakdown-row"> <div class="breakdown-dot" style="background:#34d399"></div> <div class="breakdown-name">KV Cache</div> <div class="breakdown-bar-wrap"><div class="breakdown-bar" id="bar-kv" style="background:#34d399; width:0%"></div></div> <div class="breakdown-gb" id="gb-kv">0.00 GB</div> </div> <div class="breakdown-row"> <div class="breakdown-dot" style="background:#f59e0b"></div> <div class="breakdown-name">运行时开销</div> <div class="breakdown-bar-wrap"><div class="breakdown-bar" id="bar-oh" style="background:#f59e0b; width:0%"></div></div> <div class="breakdown-gb" id="gb-oh">0.00 GB</div> </div> </div> </div> <!-- 全宽:GPU 兼容性 --> <div class="card" style="grid-column: 1 / -1"> <h2>GPU 兼容性</h2> <div class="gpu-grid" id="gpu-grid"></div> </div> </div> </div> <script> // ---------- 模型档位(增加了 20B 和 27B)---------- const MODEL_STEPS = [ { label: '0.5B', b: 0.5 }, { label: '1B', b: 1 }, { label: '1.5B', b: 1.5 }, { label: '3B', b: 3 }, { label: '7B', b: 7 }, { label: '8B', b: 8 }, { label: '13B', b: 13 }, { label: '20B', b: 20 }, // 新增 { label: '27B', b: 27 }, // 新增 { label: '30B', b: 30 }, { label: '34B', b: 34 }, { label: '70B', b: 70 }, { label: '72B', b: 72 }, { label: '120B', b: 120 }, { label: '405B', b: 405 }, { label: '671B', b: 671 } ]; const QUANT_STEPS = [ { label: '2-bit', bpw: 0.25 }, { label: '3-bit', bpw: 0.375 }, { label: 'Q4 (4-bit)', bpw: 0.5 }, { label: 'Q5 (5-bit)', bpw: 0.625 }, { label: 'Q6 (6-bit)', bpw: 0.75 }, { label: 'Q8 (8-bit)', bpw: 1.0 }, { label: 'FP16 (16-bit)',bpw: 2.0 }, { label: 'FP32 (32-bit)',bpw: 4.0 } ]; const CTX_STEPS = [ { label: '512', v: 512 }, { label: '1K', v: 1024 }, { label: '2K', v: 2048 }, { label: '4K', v: 4096 }, { label: '8K', v: 8192 }, { label: '16K', v: 16384 }, { label: '32K', v: 32768 }, { label: '64K', v: 65536 }, { label: '128K', v: 131072 }, { label: '256K', v: 262144 }, { label: '512K', v: 524288 }, { label: '1M', v: 1048576} ]; const KV_STEPS = [ { label: '4-bit', bpw: 0.5 }, { label: 'Q8 (8-bit)', bpw: 1.0 }, { label: 'FP16 (16-bit)',bpw: 2.0 } ]; const BATCH_STEPS = [1, 2, 4, 8, 16, 32]; const GPUS = [ { name: 'RTX 3060', vram: 12 }, { name: 'RTX 3090', vram: 24 }, { name: 'RTX 4070', vram: 12 }, { name: 'RTX 4090', vram: 24 }, { name: 'RTX 5090', vram: 32 }, { name: 'A10', vram: 24 }, { name: 'A100 40G', vram: 40 }, { name: 'A100 80G', vram: 80 }, { name: 'H100 80G', vram: 80 }, { name: 'H100 NVL', vram: 94 }, { name: 'H200', vram: 141 }, { name: 'B200', vram: 192 }, { name: '2× H100', vram: 160 }, { name: '4× H100', vram: 320 }, { name: '8× H100', vram: 640 }, { name: '8× B200', vram: 1536} ]; // 根据参数量估算架构(与第一版完全一致) function estimateArch(b) { const layers = Math.max(8, Math.round(14 * Math.pow(b, 0.30))); const hidden = Math.round(2048 * Math.pow(b, 0.285) / 64) * 64; const kvHeads = b >= 200 ? 16 : 8; const headDim = 128; return { layers, hidden, kvHeads, headDim }; } // 核心计算 function calculate() { const modelB = MODEL_STEPS[+document.getElementById('sl-model').value].b; const quant = QUANT_STEPS[+document.getElementById('sl-quant').value]; const ctx = CTX_STEPS[+document.getElementById('sl-ctx').value].v; const kv = KV_STEPS[+document.getElementById('sl-kv').value]; const batch = BATCH_STEPS[+document.getElementById('sl-batch').value]; const arch = estimateArch(modelB); const weightsGB = modelB * 1e9 * quant.bpw / (1024 ** 3); const kvGB = 2 * arch.layers * arch.kvHeads * arch.headDim * ctx * batch * kv.bpw / (1024 ** 3); const overheadGB = (weightsGB + kvGB) * 0.10; const totalGB = weightsGB + kvGB + overheadGB; return { weightsGB, kvGB, overheadGB, totalGB, arch }; } function fmt(n) { if (n < 10) return n.toFixed(2); if (n < 100) return n.toFixed(1); return Math.round(n).toString(); } function renderLabels() { const m = MODEL_STEPS[+document.getElementById('sl-model').value]; const q = QUANT_STEPS[+document.getElementById('sl-quant').value]; const c = CTX_STEPS[+document.getElementById('sl-ctx').value]; const k = KV_STEPS[+document.getElementById('sl-kv').value]; const b = BATCH_STEPS[+document.getElementById('sl-batch').value]; document.getElementById('val-model').textContent = m.label; document.getElementById('val-quant').textContent = q.label; document.getElementById('val-ctx').textContent = c.label + ' tokens'; document.getElementById('val-kv').textContent = k.label; document.getElementById('val-batch').textContent = b; } function renderArch(arch) { document.getElementById('arch-layers').textContent = arch.layers; document.getElementById('arch-hidden').textContent = arch.hidden.toLocaleString(); document.getElementById('arch-kv-heads').textContent = arch.kvHeads; document.getElementById('arch-head-dim').textContent = arch.headDim; } function renderResult({ weightsGB, kvGB, overheadGB, totalGB }) { const el = document.getElementById('vram-total'); el.textContent = fmt(totalGB); el.classList.remove('pop'); void el.offsetWidth; el.classList.add('pop'); document.getElementById('gb-weights').textContent = fmt(weightsGB) + ' GB'; document.getElementById('gb-kv').textContent = fmt(kvGB) + ' GB'; document.getElementById('gb-oh').textContent = fmt(overheadGB) + ' GB'; const maxBar = Math.max(weightsGB, kvGB, overheadGB, 0.01); document.getElementById('bar-weights').style.width = (weightsGB / maxBar * 100) + '%'; document.getElementById('bar-kv').style.width = (kvGB / maxBar * 100) + '%'; document.getElementById('bar-oh').style.width = (overheadGB / maxBar * 100) + '%'; } function renderGPUs(totalGB) { const grid = document.getElementById('gpu-grid'); grid.innerHTML = ''; GPUS.forEach(gpu => { const ratio = totalGB / gpu.vram; let cls, status; if (ratio <= 0.85) { cls = 'fits'; status = '✓ 够用'; } else if (ratio <= 1.0) { cls = 'tight'; status = '⚠ 紧张'; } else { cls = 'nope'; status = '✗ 不足'; } const pct = Math.min(100, Math.round(ratio * 100)); const tile = document.createElement('div'); tile.className = `gpu-tile ${cls}`; tile.innerHTML = ` <div class="gpu-name">${gpu.name}</div> <div class="gpu-mem">${gpu.vram} GB</div> <div class="gpu-status">${status} (${pct}%)</div> `; grid.appendChild(tile); }); } function update() { renderLabels(); const result = calculate(); renderArch(result.arch); renderResult(result); renderGPUs(result.totalGB); } // 绑定滑块事件 ['sl-model', 'sl-quant', 'sl-ctx', 'sl-kv', 'sl-batch'].forEach(id => { document.getElementById(id).addEventListener('input', update); }); update(); </script> </body> </html>