deepseekv4 pro生成:
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LLM 显存计算器 · GPU内存需求估算</title>
<style>
* { box-sizing: border-box; margin: 0; padding: 0; }
:root {
--bg: #080c14;
--bg2: #0e1420;
--card: rgba(255,255,255,0.04);
--card-border: rgba(255,255,255,0.08);
--accent: #6366f1;
--accent2: #a78bfa;
--green: #22d3a0;
--yellow: #f59e0b;
--red: #f43f5e;
--text: #e2e8f0;
--muted: #64748b;
}
body {
font-family: 'Segoe UI', system-ui, sans-serif;
background: var(--bg);
color: var(--text);
min-height: 100vh;
padding: 32px 16px;
}
.page { max-width: 960px; margin: 0 auto; }
header { text-align: center; margin-bottom: 40px; }
header h1 {
font-size: 2.4rem; font-weight: 700; letter-spacing: -0.5px;
background: linear-gradient(135deg, #818cf8, #c084fc, #38bdf8);
-webkit-background-clip: text; -webkit-text-fill-color: transparent;
margin-bottom: 8px;
}
header p { color: var(--muted); font-size: 0.95rem; }
.grid { display: grid; grid-template-columns: 1fr 1fr; gap: 20px; }
@media (max-width: 700px) { .grid { grid-template-columns: 1fr; } }
.card {
background: var(--card);
border: 1px solid var(--card-border);
border-radius: 16px;
padding: 24px;
backdrop-filter: blur(8px);
}
.card h2 {
font-size: 0.8rem; font-weight: 600; letter-spacing: 0.08em;
text-transform: uppercase; color: var(--muted); margin-bottom: 20px;
}
.slider-group { margin-bottom: 22px; }
.slider-header {
display: flex; justify-content: space-between; align-items: baseline;
margin-bottom: 8px;
}
.slider-label { font-size: 0.88rem; color: #94a3b8; }
.slider-value {
font-size: 1rem; font-weight: 700; color: #c4b5fd;
font-variant-numeric: tabular-nums;
transition: color 0.2s;
}
input[type=range] {
-webkit-appearance: none; appearance: none;
width: 100%; height: 6px; border-radius: 3px;
background: rgba(255,255,255,0.1); outline: none; cursor: pointer;
}
input[type=range]::-webkit-slider-thumb {
-webkit-appearance: none; appearance: none;
width: 18px; height: 18px; border-radius: 50%;
background: linear-gradient(135deg, #6366f1, #a78bfa);
cursor: pointer; transition: transform 0.15s, box-shadow 0.15s;
box-shadow: 0 0 0 3px rgba(99,102,241,0.25);
}
input[type=range]::-webkit-slider-thumb:hover {
transform: scale(1.2);
box-shadow: 0 0 0 5px rgba(99,102,241,0.35);
}
input[type=range]::-moz-range-thumb {
width: 18px; height: 18px; border-radius: 50%; border: none;
background: linear-gradient(135deg, #6366f1, #a78bfa);
cursor: pointer;
}
.result-card {
background: linear-gradient(135deg, rgba(99,102,241,0.12), rgba(167,139,250,0.08));
border: 1px solid rgba(99,102,241,0.3);
border-radius: 16px; padding: 28px; text-align: center;
grid-column: 1 / -1; position: relative; overflow: hidden;
}
.result-card::before {
content: '';
position: absolute; top: -60px; right: -60px;
width: 200px; height: 200px; border-radius: 50%;
background: radial-gradient(circle, rgba(99,102,241,0.15), transparent 70%);
pointer-events: none;
}
.vram-total {
font-size: 4rem; font-weight: 800; letter-spacing: -2px;
font-variant-numeric: tabular-nums;
background: linear-gradient(135deg, #818cf8, #c084fc);
-webkit-background-clip: text; -webkit-text-fill-color: transparent;
line-height: 1; margin: 8px 0 4px;
transition: all 0.3s;
}
.vram-unit { font-size: 1.3rem; font-weight: 500; color: var(--muted); }
.vram-label { font-size: 0.8rem; letter-spacing: 0.1em; text-transform: uppercase; color: var(--muted); }
.breakdown { margin-top: 24px; }
.breakdown-row { display: flex; align-items: center; gap: 10px; margin-bottom: 10px; }
.breakdown-dot {
width: 10px; height: 10px; border-radius: 50%; flex-shrink: 0;
}
.breakdown-name { font-size: 0.82rem; color: #94a3b8; width: 130px; flex-shrink: 0; text-align: left; }
.breakdown-bar-wrap {
flex: 1; height: 8px; background: rgba(255,255,255,0.06); border-radius: 4px; overflow: hidden;
}
.breakdown-bar {
height: 100%; border-radius: 4px;
transition: width 0.4s cubic-bezier(0.4,0,0.2,1);
}
.breakdown-gb {
font-size: 0.82rem; font-weight: 600; font-variant-numeric: tabular-norms;
min-width: 70px; text-align: right; color: var(--text);
}
.gpu-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(140px, 1fr)); gap: 10px; margin-top: 8px; }
.gpu-tile {
background: rgba(255,255,255,0.03);
border: 1px solid var(--card-border);
border-radius: 10px; padding: 10px 12px;
display: flex; flex-direction: column; gap: 4px;
transition: border-color 0.25s, background 0.25s, opacity 0.25s;
}
.gpu-tile.fits {
border-color: rgba(34,211,160,0.4);
background: rgba(34,211,160,0.06);
}
.gpu-tile.tight {
border-color: rgba(245,158,11,0.4);
background: rgba(245,158,11,0.06);
}
.gpu-tile.nope { opacity: 0.35; }
.gpu-name { font-size: 0.75rem; font-weight: 600; color: #cbd5e1; }
.gpu-mem { font-size: 0.72rem; color: var(--muted); }
.gpu-status { font-size: 0.68rem; font-weight: 700; letter-spacing: 0.05em; margin-top: 2px; }
.gpu-tile.fits .gpu-status { color: var(--green); }
.gpu-tile.tight .gpu-status { color: var(--yellow); }
.gpu-tile.nope .gpu-status { color: var(--muted); }
.arch-grid { display: grid; grid-template-columns: repeat(4, 1fr); gap: 12px; margin-top: 4px; }
@media (max-width: 500px) { .arch-grid { grid-template-columns: repeat(2, 1fr); } }
.arch-item {
background: rgba(255,255,255,0.03);
border: 1px solid var(--card-border);
border-radius: 10px; padding: 10px 14px;
text-align: center;
}
.arch-val { font-size: 1.1rem; font-weight: 700; color: #818cf8; }
.arch-key { font-size: 0.68rem; color: var(--muted); text-transform: uppercase; letter-spacing: 0.06em; margin-top: 2px; }
.formula-note {
background: rgba(255,255,255,0.02); border: 1px solid var(--card-border);
border-radius: 10px; padding: 12px 16px; margin-top: 8px;
font-size: 0.78rem; color: var(--muted); line-height: 1.6;
}
.formula-note code {
background: rgba(255,255,255,0.07); padding: 1px 5px; border-radius: 4px;
font-family: 'JetBrains Mono', 'Fira Code', monospace; font-size: 0.85em;
color: #c4b5fd;
}
.tick-row { display: flex; justify-content: space-between; margin-top: 4px; }
.tick { font-size: 0.65rem; color: var(--muted); }
@keyframes pop { 0% { transform: scale(1); } 50% { transform: scale(1.05); } 100% { transform: scale(1); } }
.pop { animation: pop 0.25s ease-out; }
</style>
</head>
<body>
<div class="page">
<header>
<h1>LLM 显存计算器</h1>
<p>估算大语言模型推理所需的 GPU 显存</p>
</header>
<div class="grid">
<!-- 左侧:控制区 -->
<div class="card">
<h2>模型参数</h2>
<!-- 模型大小(档位已扩展:增加20B和27B) -->
<div class="slider-group">
<div class="slider-header">
<span class="slider-label">模型大小</span>
<span class="slider-value" id="val-model">7 B</span>
</div>
<input type="range" id="sl-model" min="0" max="15" step="1" value="4">
<div class="tick-row">
<span class="tick">0.5B</span><span class="tick">7B</span>
<span class="tick">70B</span><span class="tick">671B</span>
</div>
</div>
<!-- 权重量化 -->
<div class="slider-group">
<div class="slider-header">
<span class="slider-label">权重量化</span>
<span class="slider-value" id="val-quant">FP16 (16-bit)</span>
</div>
<input type="range" id="sl-quant" min="0" max="7" step="1" value="6">
<div class="tick-row">
<span class="tick">2-bit</span><span class="tick">Q4</span>
<span class="tick">Q8</span><span class="tick">FP32</span>
</div>
</div>
<!-- 上下文长度 -->
<div class="slider-group">
<div class="slider-header">
<span class="slider-label">上下文长度</span>
<span class="slider-value" id="val-ctx">4 K tokens</span>
</div>
<input type="range" id="sl-ctx" min="0" max="11" step="1" value="3">
<div class="tick-row">
<span class="tick">512</span><span class="tick">4K</span>
<span class="tick">128K</span><span class="tick">1M</span>
</div>
</div>
<!-- KV Cache 量化 -->
<div class="slider-group">
<div class="slider-header">
<span class="slider-label">KV Cache 量化</span>
<span class="slider-value" id="val-kv">FP16 (16-bit)</span>
</div>
<input type="range" id="sl-kv" min="0" max="2" step="1" value="2">
<div class="tick-row">
<span class="tick">4-bit</span><span class="tick">8-bit</span>
<span class="tick">FP16</span>
</div>
</div>
<!-- 批量大小 -->
<div class="slider-group" style="margin-bottom:0">
<div class="slider-header">
<span class="slider-label">批量大小</span>
<span class="slider-value" id="val-batch">1</span>
</div>
<input type="range" id="sl-batch" min="0" max="5" step="1" value="0">
<div class="tick-row">
<span class="tick">1</span><span class="tick">4</span>
<span class="tick">16</span><span class="tick">32</span>
</div>
</div>
</div>
<!-- 右侧:架构信息 + 公式 -->
<div class="card">
<h2>估算架构</h2>
<div class="arch-grid">
<div class="arch-item"><div class="arch-val" id="arch-layers">32</div><div class="arch-key">层数</div></div>
<div class="arch-item"><div class="arch-val" id="arch-hidden">4096</div><div class="arch-key">隐藏维度</div></div>
<div class="arch-item"><div class="arch-val" id="arch-kv-heads">8</div><div class="arch-key">KV 头数</div></div>
<div class="arch-item"><div class="arch-val" id="arch-head-dim">128</div><div class="arch-key">头维度</div></div>
</div>
<div style="margin-top:20px">
<h2>显存公式</h2>
<div class="formula-note">
<b style="color:#c4b5fd">模型权重</b> = 参数量 × 每参数字节数<br>
<b style="color:#34d399">KV Cache</b> = 长度 × 批量 × 2 × 层数 × kv_heads × head_dim × kv字节数<br>
<b style="color:#f59e0b">运行时开销</b> = (权重 + KV cache) × 10%<br><br>
<code>总显存 = 权重 + KV Cache + 开销</code>
</div>
</div>
<div style="margin-top:20px">
<h2>量化字节/参数量</h2>
<div class="formula-note">
2-bit <code>0.25 B</code> · 3-bit <code>0.375 B</code> · 4-bit <code>0.5 B</code> ·
5-bit <code>0.625 B</code> · 6-bit <code>0.75 B</code> · 8-bit / Q8 <code>1 B</code> ·
FP16 / BF16 <code>2 B</code> · FP32 <code>4 B</code>
</div>
</div>
</div>
<!-- 全宽:总显存结果 -->
<div class="result-card">
<div class="vram-label">所需显存</div>
<div class="vram-total" id="vram-total">0.00</div>
<div class="vram-unit">GB</div>
<div class="breakdown" id="breakdown">
<div class="breakdown-row">
<div class="breakdown-dot" style="background:#818cf8"></div>
<div class="breakdown-name">模型权重</div>
<div class="breakdown-bar-wrap"><div class="breakdown-bar" id="bar-weights" style="background:#818cf8; width:0%"></div></div>
<div class="breakdown-gb" id="gb-weights">0.00 GB</div>
</div>
<div class="breakdown-row">
<div class="breakdown-dot" style="background:#34d399"></div>
<div class="breakdown-name">KV Cache</div>
<div class="breakdown-bar-wrap"><div class="breakdown-bar" id="bar-kv" style="background:#34d399; width:0%"></div></div>
<div class="breakdown-gb" id="gb-kv">0.00 GB</div>
</div>
<div class="breakdown-row">
<div class="breakdown-dot" style="background:#f59e0b"></div>
<div class="breakdown-name">运行时开销</div>
<div class="breakdown-bar-wrap"><div class="breakdown-bar" id="bar-oh" style="background:#f59e0b; width:0%"></div></div>
<div class="breakdown-gb" id="gb-oh">0.00 GB</div>
</div>
</div>
</div>
<!-- 全宽:GPU 兼容性 -->
<div class="card" style="grid-column: 1 / -1">
<h2>GPU 兼容性</h2>
<div class="gpu-grid" id="gpu-grid"></div>
</div>
</div>
</div>
<script>
// ---------- 模型档位(增加了 20B 和 27B)----------
const MODEL_STEPS = [
{ label: '0.5B', b: 0.5 },
{ label: '1B', b: 1 },
{ label: '1.5B', b: 1.5 },
{ label: '3B', b: 3 },
{ label: '7B', b: 7 },
{ label: '8B', b: 8 },
{ label: '13B', b: 13 },
{ label: '20B', b: 20 }, // 新增
{ label: '27B', b: 27 }, // 新增
{ label: '30B', b: 30 },
{ label: '34B', b: 34 },
{ label: '70B', b: 70 },
{ label: '72B', b: 72 },
{ label: '120B', b: 120 },
{ label: '405B', b: 405 },
{ label: '671B', b: 671 }
];
const QUANT_STEPS = [
{ label: '2-bit', bpw: 0.25 },
{ label: '3-bit', bpw: 0.375 },
{ label: 'Q4 (4-bit)', bpw: 0.5 },
{ label: 'Q5 (5-bit)', bpw: 0.625 },
{ label: 'Q6 (6-bit)', bpw: 0.75 },
{ label: 'Q8 (8-bit)', bpw: 1.0 },
{ label: 'FP16 (16-bit)',bpw: 2.0 },
{ label: 'FP32 (32-bit)',bpw: 4.0 }
];
const CTX_STEPS = [
{ label: '512', v: 512 },
{ label: '1K', v: 1024 },
{ label: '2K', v: 2048 },
{ label: '4K', v: 4096 },
{ label: '8K', v: 8192 },
{ label: '16K', v: 16384 },
{ label: '32K', v: 32768 },
{ label: '64K', v: 65536 },
{ label: '128K', v: 131072 },
{ label: '256K', v: 262144 },
{ label: '512K', v: 524288 },
{ label: '1M', v: 1048576}
];
const KV_STEPS = [
{ label: '4-bit', bpw: 0.5 },
{ label: 'Q8 (8-bit)', bpw: 1.0 },
{ label: 'FP16 (16-bit)',bpw: 2.0 }
];
const BATCH_STEPS = [1, 2, 4, 8, 16, 32];
const GPUS = [
{ name: 'RTX 3060', vram: 12 },
{ name: 'RTX 3090', vram: 24 },
{ name: 'RTX 4070', vram: 12 },
{ name: 'RTX 4090', vram: 24 },
{ name: 'RTX 5090', vram: 32 },
{ name: 'A10', vram: 24 },
{ name: 'A100 40G', vram: 40 },
{ name: 'A100 80G', vram: 80 },
{ name: 'H100 80G', vram: 80 },
{ name: 'H100 NVL', vram: 94 },
{ name: 'H200', vram: 141 },
{ name: 'B200', vram: 192 },
{ name: '2× H100', vram: 160 },
{ name: '4× H100', vram: 320 },
{ name: '8× H100', vram: 640 },
{ name: '8× B200', vram: 1536}
];
// 根据参数量估算架构(与第一版完全一致)
function estimateArch(b) {
const layers = Math.max(8, Math.round(14 * Math.pow(b, 0.30)));
const hidden = Math.round(2048 * Math.pow(b, 0.285) / 64) * 64;
const kvHeads = b >= 200 ? 16 : 8;
const headDim = 128;
return { layers, hidden, kvHeads, headDim };
}
// 核心计算
function calculate() {
const modelB = MODEL_STEPS[+document.getElementById('sl-model').value].b;
const quant = QUANT_STEPS[+document.getElementById('sl-quant').value];
const ctx = CTX_STEPS[+document.getElementById('sl-ctx').value].v;
const kv = KV_STEPS[+document.getElementById('sl-kv').value];
const batch = BATCH_STEPS[+document.getElementById('sl-batch').value];
const arch = estimateArch(modelB);
const weightsGB = modelB * 1e9 * quant.bpw / (1024 ** 3);
const kvGB = 2 * arch.layers * arch.kvHeads * arch.headDim * ctx * batch * kv.bpw / (1024 ** 3);
const overheadGB = (weightsGB + kvGB) * 0.10;
const totalGB = weightsGB + kvGB + overheadGB;
return { weightsGB, kvGB, overheadGB, totalGB, arch };
}
function fmt(n) {
if (n < 10) return n.toFixed(2);
if (n < 100) return n.toFixed(1);
return Math.round(n).toString();
}
function renderLabels() {
const m = MODEL_STEPS[+document.getElementById('sl-model').value];
const q = QUANT_STEPS[+document.getElementById('sl-quant').value];
const c = CTX_STEPS[+document.getElementById('sl-ctx').value];
const k = KV_STEPS[+document.getElementById('sl-kv').value];
const b = BATCH_STEPS[+document.getElementById('sl-batch').value];
document.getElementById('val-model').textContent = m.label;
document.getElementById('val-quant').textContent = q.label;
document.getElementById('val-ctx').textContent = c.label + ' tokens';
document.getElementById('val-kv').textContent = k.label;
document.getElementById('val-batch').textContent = b;
}
function renderArch(arch) {
document.getElementById('arch-layers').textContent = arch.layers;
document.getElementById('arch-hidden').textContent = arch.hidden.toLocaleString();
document.getElementById('arch-kv-heads').textContent = arch.kvHeads;
document.getElementById('arch-head-dim').textContent = arch.headDim;
}
function renderResult({ weightsGB, kvGB, overheadGB, totalGB }) {
const el = document.getElementById('vram-total');
el.textContent = fmt(totalGB);
el.classList.remove('pop');
void el.offsetWidth;
el.classList.add('pop');
document.getElementById('gb-weights').textContent = fmt(weightsGB) + ' GB';
document.getElementById('gb-kv').textContent = fmt(kvGB) + ' GB';
document.getElementById('gb-oh').textContent = fmt(overheadGB) + ' GB';
const maxBar = Math.max(weightsGB, kvGB, overheadGB, 0.01);
document.getElementById('bar-weights').style.width = (weightsGB / maxBar * 100) + '%';
document.getElementById('bar-kv').style.width = (kvGB / maxBar * 100) + '%';
document.getElementById('bar-oh').style.width = (overheadGB / maxBar * 100) + '%';
}
function renderGPUs(totalGB) {
const grid = document.getElementById('gpu-grid');
grid.innerHTML = '';
GPUS.forEach(gpu => {
const ratio = totalGB / gpu.vram;
let cls, status;
if (ratio <= 0.85) { cls = 'fits'; status = '✓ 够用'; }
else if (ratio <= 1.0) { cls = 'tight'; status = '⚠ 紧张'; }
else { cls = 'nope'; status = '✗ 不足'; }
const pct = Math.min(100, Math.round(ratio * 100));
const tile = document.createElement('div');
tile.className = `gpu-tile ${cls}`;
tile.innerHTML = `
<div class="gpu-name">${gpu.name}</div>
<div class="gpu-mem">${gpu.vram} GB</div>
<div class="gpu-status">${status} (${pct}%)</div>
`;
grid.appendChild(tile);
});
}
function update() {
renderLabels();
const result = calculate();
renderArch(result.arch);
renderResult(result);
renderGPUs(result.totalGB);
}
// 绑定滑块事件
['sl-model', 'sl-quant', 'sl-ctx', 'sl-kv', 'sl-batch'].forEach(id => {
document.getElementById(id).addEventListener('input', update);
});
update();
</script>
</body>
</html>