{
  "title": "AIPM LLM Take-away Knowledge Database",
  "generated_on": "2026-06-23",
  "audience": "AI product managers and product-minded builders who need LLM principles, not research depth.",
  "sources": [
    {
      "id": "cs336",
      "title": "Stanford CS336 Spring 2025",
      "url": "https://cs336.stanford.edu/spring2025/",
      "role": "Course backbone: from tokenization to systems, data, scaling, evaluation, and alignment."
    },
    {
      "id": "cs336-lectures",
      "title": "stanford-cs336/spring2025-lectures",
      "url": "https://github.com/stanford-cs336/spring2025-lectures",
      "role": "Executable lectures and slide archive."
    },
    {
      "id": "alisa-book",
      "title": "Alisa's book of LLMs",
      "url": "https://alisawuffles.notion.site/alisa-s-book-of-llms",
      "role": "Structured Notion learning notes that build LLM intuition from neural nets, activations, gradients, and implementation details."
    },
    {
      "id": "alisa-job-search",
      "title": "Alisa Liu: Notes on the Industry Job Search",
      "url": "https://alisawuffles.github.io/blog/job-search/",
      "role": "Industry job-search field report connecting CS336, LLM notes, ML coding, technical discussions, and breadth-first preparation."
    },
    {
      "id": "modern-software",
      "title": "CS146S: The Modern Software Developer",
      "url": "https://themodernsoftware.dev/",
      "role": "Stanford course on AI-assisted software development, coding agents, MCP, AI IDEs, testing, security, review, and observability."
    },
    {
      "id": "ai-for-everyone",
      "title": "DeepLearning.AI: AI for Everyone",
      "url": "https://www.deeplearning.ai/courses/ai-for-everyone/",
      "role": "Andrew Ng's non-technical AI course for AI terminology, ML project workflow, AI strategy, and organizational adoption."
    },
    {
      "id": "pair-guidebook",
      "title": "Google People + AI Guidebook",
      "url": "https://pair.withgoogle.com/guidebook/",
      "role": "Product-design guide for human-centered AI, trust, control, failure recovery, feedback, and user mental models."
    },
    {
      "id": "openai-prompt",
      "title": "OpenAI Prompt Engineering Guide",
      "url": "https://platform.openai.com/docs/guides/prompt-engineering",
      "role": "Official guidance for task clarity, examples, structured outputs, constraints, and prompt iteration."
    },
    {
      "id": "anthropic-prompt",
      "title": "Anthropic Prompt Engineering Overview",
      "url": "https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/overview",
      "role": "Prompting guidance that emphasizes defining success criteria and test methods before prompt iteration."
    },
    {
      "id": "anthropic-tool-use",
      "title": "Anthropic Tool Use",
      "url": "https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/overview",
      "role": "Official explanation of tool use, client/server tools, tool execution loops, trigger boundaries, and tool-related cost."
    },
    {
      "id": "openai-agents-tools",
      "title": "OpenAI API: tools, function calling, structured outputs, Agents SDK",
      "url": "https://platform.openai.com/docs",
      "role": "Official OpenAI docs for tools, structured output, function calling, Agents SDK, guardrails, and agent workflow evaluation."
    },
    {
      "id": "openai-evals",
      "title": "OpenAI Evals Guide",
      "url": "https://platform.openai.com/docs/guides/evals",
      "role": "Official evals guide for measuring model and application behavior with datasets, graders, and regression workflows."
    },
    {
      "id": "reforge-blog",
      "title": "Reforge Blog",
      "url": "https://www.reforge.com/blog",
      "role": "Product and growth writing including AI product-team capabilities, experimentation, pricing, and evaluation practices."
    },
    {
      "id": "nist-ai-rmf",
      "title": "NIST AI Risk Management Framework",
      "url": "https://www.nist.gov/itl/ai-risk-management-framework",
      "role": "Enterprise AI risk-management framework for mapping, measuring, managing, and governing AI risk."
    },
    {
      "id": "microsoft-responsible-ai",
      "title": "Microsoft Responsible AI",
      "url": "https://www.microsoft.com/en-us/ai/responsible-ai",
      "role": "Responsible AI principles and practices covering fairness, reliability, safety, privacy, security, transparency, and accountability."
    },
    {
      "id": "google-ai-principles",
      "title": "Google AI Principles",
      "url": "https://ai.google/responsibility/responsible-ai-practices/",
      "role": "Responsible AI principles and practices across model design, testing, deployment, monitoring, safety, privacy, and user benefit."
    },
    {
      "id": "lenny",
      "title": "Lenny's Newsletter",
      "url": "https://www.lennysnewsletter.com/",
      "role": "Long-running product newsletter for PM craft, growth, career, and product strategy."
    },
    {
      "id": "svpg",
      "title": "Silicon Valley Product Group Articles",
      "url": "https://www.svpg.com/articles/",
      "role": "Product-discovery and product-operating-model essays from SVPG / Marty Cagan's product craft tradition."
    },
    {
      "id": "intercom-blog",
      "title": "Intercom Blog",
      "url": "https://www.intercom.com/blog/",
      "role": "AI customer-support, AI agent, automation, conversation design, pricing, and B2B SaaS product implementation writing."
    },
    {
      "id": "bi-break-into-ai",
      "title": "Business Insider: how people broke into AI",
      "url": "https://www.businessinsider.com/tech-workers-simple-tip-for-breaking-into-ai-2025-12",
      "role": "Career stories emphasizing hands-on proof-of-work, real AI product building, and project-based learning."
    },
    {
      "id": "bi-salesforce-pm",
      "title": "Business Insider: product manager landed Salesforce AI role",
      "url": "https://www.businessinsider.com/job-search-advice-strategies-tech-interviews-uber-meta-amazon-salesforce-2025-10",
      "role": "Interview story covering AI PM case questions, AI feature metrics, recruiter outreach, and multi-round product interviews."
    },
    {
      "id": "bi-tmobile-ai-pm",
      "title": "Business Insider: agentic AI PM at T-Mobile",
      "url": "https://www.businessinsider.com/f1-visa-holder-lands-tmobile-tech-job-layoff-deadline-2026-1",
      "role": "AI PM job-search story emphasizing hiring-manager outreach, mock interviews, communication practice, and agentic AI fit."
    },
    {
      "id": "bi-agentforce",
      "title": "Business Insider: inside Salesforce's Agentforce bet",
      "url": "https://www.businessinsider.com/inside-salesforce-struggles-agentforce-flagship-ai-agent-wars-benioff-2025-11",
      "role": "Enterprise AI agent adoption story highlighting implementation complexity, ROI ambiguity, sales enablement, and demo-to-production gaps."
    },
    {
      "id": "wdndev",
      "title": "wdndev/llm_interview_note",
      "url": "https://github.com/wdndev/llm_interview_note",
      "role": "Chinese LLM interview and implementation knowledge map."
    },
    {
      "id": "luhengshiwo",
      "title": "luhengshiwo/LLMForEverybody",
      "url": "https://github.com/luhengshiwo/LLMForEverybody",
      "role": "Chinese beginner-friendly LLM knowledge and interview preparation."
    },
    {
      "id": "km1994",
      "title": "km1994/LLMs_interview_notes",
      "url": "https://github.com/km1994/LLMs_interview_notes",
      "role": "Chinese LLM algorithm interview notes."
    },
    {
      "id": "faq",
      "title": "aceliuchanghong/FAQ_Of_LLM_Interview",
      "url": "https://github.com/aceliuchanghong/FAQ_Of_LLM_Interview",
      "role": "Chinese FAQ-style LLM interview concepts."
    },
    {
      "id": "ai-eng-interview",
      "title": "amitshekhariitbhu/ai-engineering-interview-questions",
      "url": "https://github.com/amitshekhariitbhu/ai-engineering-interview-questions",
      "role": "AI engineering interview questions covering LLM, RAG, agents, fine-tuning, embeddings, evaluation."
    },
    {
      "id": "mlabonne",
      "title": "mlabonne/llm-course",
      "url": "https://github.com/mlabonne/llm-course",
      "role": "LLM scientist and engineer roadmap with RAG, fine-tuning, quantization, deployment, and security."
    },
    {
      "id": "rasbt",
      "title": "rasbt/LLMs-from-scratch",
      "url": "https://github.com/rasbt/LLMs-from-scratch",
      "role": "Step-by-step LLM implementation reference."
    },
    {
      "id": "attention",
      "title": "Attention Is All You Need",
      "url": "https://arxiv.org/abs/1706.03762",
      "role": "Transformer architecture foundation."
    },
    {
      "id": "rag-paper",
      "title": "Retrieval-Augmented Generation",
      "url": "https://arxiv.org/abs/2005.11401",
      "role": "Classic RAG formulation."
    },
    {
      "id": "instructgpt",
      "title": "Training language models to follow instructions with human feedback",
      "url": "https://arxiv.org/abs/2203.02155",
      "role": "Instruction tuning and RLHF reference."
    },
    {
      "id": "chinchilla",
      "title": "Training Compute-Optimal Large Language Models",
      "url": "https://arxiv.org/abs/2203.15556",
      "role": "Scaling-law and compute-optimal training reference."
    },
    {
      "id": "lora",
      "title": "LoRA",
      "url": "https://arxiv.org/abs/2106.09685",
      "role": "Parameter-efficient fine-tuning reference."
    },
    {
      "id": "qlora",
      "title": "QLoRA",
      "url": "https://arxiv.org/abs/2305.14314",
      "role": "Memory-efficient 4-bit fine-tuning reference."
    },
    {
      "id": "dpo",
      "title": "Direct Preference Optimization",
      "url": "https://arxiv.org/abs/2305.18290",
      "role": "Preference alignment without explicit reward-model RL loop."
    },
    {
      "id": "flashattention",
      "title": "FlashAttention",
      "url": "https://arxiv.org/abs/2205.14135",
      "role": "Attention IO optimization reference."
    },
    {
      "id": "vllm",
      "title": "vLLM / PagedAttention",
      "url": "https://arxiv.org/abs/2309.06180",
      "role": "Serving throughput and KV-cache paging reference."
    },
    {
      "id": "helm",
      "title": "HELM",
      "url": "https://crfm.stanford.edu/helm/latest/",
      "role": "Evaluation philosophy and benchmark landscape."
    },
    {
      "id": "arena",
      "title": "Chatbot Arena / LMSYS Arena",
      "url": "https://lmarena.ai/",
      "role": "Human preference arena for model comparison."
    }
  ],
  "capability_map": [
    {
      "title": "1. AI 基础认知",
      "focus": "AI 能做什么、不能做什么；数据、模型、任务、评估和组织落地之间的关系。",
      "questions": [
        "这个问题是否真的适合 AI？",
        "失败通常来自数据、模型、流程还是组织？"
      ],
      "source_ids": [
        "ai-for-everyone",
        "cs336"
      ]
    },
    {
      "title": "2. AI 产品设计",
      "focus": "什么时候该用 AI；失败恢复、信任、解释、控制权、确认和人机协作。",
      "questions": [
        "用户如何发现并纠正 AI 错误？",
        "哪些动作必须 human-in-the-loop？"
      ],
      "source_ids": [
        "pair-guidebook",
        "google-ai-principles",
        "intercom-blog"
      ]
    },
    {
      "title": "3. Prompt / LLM 产品能力",
      "focus": "任务定义、输入输出结构、few-shot、约束、失败样例、结构化输出和 prompt baseline。",
      "questions": [
        "成功标准是否先于 prompt 定义？",
        "什么时候 prompt 不够，需要 RAG、工具或评测？"
      ],
      "source_ids": [
        "openai-prompt",
        "anthropic-prompt"
      ]
    },
    {
      "title": "4. Agent / 工具调用",
      "focus": "工具调用边界、权限、日志、失败重试、回滚、确认、成本和 agent workflow。",
      "questions": [
        "模型什么时候该调用工具？",
        "工具失败或越权时产品如何兜底？"
      ],
      "source_ids": [
        "anthropic-tool-use",
        "openai-agents-tools",
        "modern-software"
      ]
    },
    {
      "title": "5. AI 产品评估 / Evals",
      "focus": "任务级评测、黄金集、失败分级、幻觉率、人工审核、灰度和反馈闭环。",
      "questions": [
        "这个 AI 功能怎么算好？",
        "哪些失败不能上线？"
      ],
      "source_ids": [
        "openai-evals",
        "reforge-blog",
        "helm"
      ]
    },
    {
      "title": "6. 负责任 AI / 风险治理",
      "focus": "公平、可靠安全、隐私安全、透明、问责、人类控制和企业风险管理。",
      "questions": [
        "高风险输出如何审核和追责？",
        "风险治理是否进入需求、设计、评测和上线流程？"
      ],
      "source_ids": [
        "nist-ai-rmf",
        "microsoft-responsible-ai",
        "google-ai-principles"
      ]
    }
  ],
  "cards": [
    {
      "id": "mental-model",
      "title": "LLM 产品栈：不是一个模型，而是一条系统链路",
      "layer": "overview",
      "pm_takeaway": "AIPM 不需要把每个 kernel 写出来，但必须知道问题发生在哪一层：模型能力、上下文、检索、工具、后训练、推理服务、评测与安全是不同层，不要用一个方案解决所有问题。",
      "technical_core": "从输入到输出可以拆成：tokenization -> embeddings -> Transformer inference -> decoding -> optional retrieval/tools -> guardrails -> evaluation/monitoring。每层都有独立约束和成本。",
      "product_questions": [
        "这个需求是模型本身不会，还是上下文没给够？",
        "需要 RAG、fine-tuning、agent，还是只是 prompt/template？",
        "主要瓶颈是准确率、延迟、成本、可解释性还是合规？"
      ],
      "common_traps": [
        "把所有问题都叫 hallucination。",
        "把模型 benchmark 分数当成产品体验。",
        "上线前没有定义任务级成功指标。"
      ],
      "source_ids": [
        "cs336",
        "mlabonne",
        "ai-eng-interview"
      ],
      "tags": [
        "AIPM",
        "architecture",
        "product"
      ]
    },
    {
      "id": "learning-depth",
      "title": "AIPM 学习深度：懂到能问对问题，而不是背完公式",
      "layer": "learning path",
      "pm_takeaway": "AIPM 最有效的学习路线是先建立可沟通的底层直觉：神经网络如何从输入变输出，loss 如何驱动参数更新，Transformer 为什么能看上下文，再进入 RAG、微调、评测和系统成本。",
      "technical_core": "MLP、activation、gradient、matrix shape、tokenization、attention 和 decoding 是理解 LLM 行为的基础积木。PM 不必手推所有反向传播，但要知道这些概念分别对应能力、训练、推理和成本中的哪类约束。",
      "product_questions": [
        "我现在是在补数学直觉、模型结构、训练方法，还是应用系统？",
        "这个概念能帮助我做产品决策吗？",
        "我是否能把它翻译成需求、指标、风险或工程问题？"
      ],
      "common_traps": [
        "从 RAG/agent 直接跳到调参，缺少模型行为直觉。",
        "沉迷公式细节却无法连接到产品判断。",
        "把“懂原理”等同于能训练大模型。"
      ],
      "source_ids": [
        "alisa-book",
        "cs336",
        "mlabonne",
        "rasbt"
      ],
      "tags": [
        "learning",
        "AIPM",
        "foundations"
      ]
    },
    {
      "id": "aipm-capability-roadmap",
      "title": "AI PM 能力路线图：先补 6 类能力，而不是刷 AI 新闻",
      "layer": "learning path",
      "pm_takeaway": "AI PM 最该补的不是碎片新闻，而是六类可迁移能力：AI 基础认知、AI 产品设计、Prompt/LLM 产品能力、Agent/工具调用、AI 产品评估、负责任 AI/风险治理。产品基本功仍然是底座。",
      "technical_core": "这条路线把学习目标从“知道很多工具”改成“能做产品判断”：AI 是否适合、用户失败时怎么办、prompt 何时不够、agent 如何授权、eval 如何定义、风险如何治理、商业化如何闭环。",
      "product_questions": [
        "我现在补的是能力，还是只是在收藏资料？",
        "这个资源能帮我回答哪类产品决策问题？",
        "我是否能把学习结果沉淀成 checklist、case answer 或 PRD 模板？"
      ],
      "common_traps": [
        "泛泛追 AI 新闻，没有形成可复用判断框架。",
        "只学模型技术，不补产品发现、实验和商业化。",
        "只背 prompt 模板，不定义成功标准和评测。"
      ],
      "source_ids": [
        "ai-for-everyone",
        "pair-guidebook",
        "openai-prompt",
        "anthropic-tool-use",
        "openai-evals",
        "nist-ai-rmf",
        "lenny",
        "svpg"
      ],
      "tags": [
        "learning",
        "AI PM",
        "roadmap",
        "capability"
      ]
    },
    {
      "id": "ai-product-design-trust",
      "title": "AI 产品设计：核心是失败、信任、控制权和人机协作",
      "layer": "product design",
      "pm_takeaway": "AI 功能不是“加一个智能按钮”。AIPM 要设计用户何时信任、何时确认、如何理解模型不确定性、失败后怎么恢复、哪些动作必须 human-in-the-loop。",
      "technical_core": "People + AI 的产品视角可以和 LLM 工程结合：模型输出需要解释、引用、置信边界、用户反馈、撤销/回滚、权限和升级路径。越高风险的场景，越不能把 AI 当黑盒自动化。",
      "product_questions": [
        "AI 失败时用户能发现并修正吗？",
        "用户是否有足够控制权，而不是被模型替他们做决定？",
        "哪些输出需要引用、解释或人工确认？"
      ],
      "common_traps": [
        "只优化自动化率，不设计失败恢复。",
        "用拟人化文案掩盖不确定性。",
        "高风险动作没有确认、撤销和审计。"
      ],
      "source_ids": [
        "pair-guidebook",
        "google-ai-principles",
        "microsoft-responsible-ai",
        "intercom-blog"
      ],
      "tags": [
        "design",
        "trust",
        "human-in-the-loop",
        "responsible-ai"
      ]
    },
    {
      "id": "career-skill-map",
      "title": "LLM 能力图谱：概念、实现、实验设计和表达要同时练",
      "layer": "career & collaboration",
      "pm_takeaway": "无论是转岗、面试，还是和算法/工程团队共创，AIPM 不能只会讲概念。更可靠的能力组合是：知道原理边界，能读懂实验设计，理解从零实现的关键困难，并能把技术取舍讲成产品判断。",
      "technical_core": "真实技术讨论常覆盖 ML coding、Transformer/debugging、decoding、实验设计、位置编码、并行训练、RLHF/GRPO 等宽主题。CS336 这类课程的价值在于把散乱知识组织成一张领域地图。",
      "product_questions": [
        "我能否解释一个方案为什么失败，而不只是说模型不行？",
        "我能否把技术讨论转成指标、实验和下一步？",
        "我是否知道哪些概念需要会实现，哪些只需理解边界？"
      ],
      "common_traps": [
        "只背八股答案，不做实现和调试练习。",
        "只学产品案例，不理解模型/系统约束。",
        "把面试准备和实际工作能力割裂开。"
      ],
      "source_ids": [
        "alisa-job-search",
        "cs336",
        "alisa-book",
        "rasbt"
      ],
      "tags": [
        "career",
        "interview",
        "learning",
        "collaboration"
      ]
    },
    {
      "id": "interview-intel",
      "title": "面经雷达：AI PM 面试反复考“真实 AI 判断力”",
      "layer": "interview intel",
      "pm_takeaway": "公开面经里反复出现的信号很一致：公司想看候选人是否真的做过 AI 产品、能否把用户问题转成轻量 AI 方案、能否设计指标和实验，并能讲清模型能力、数据、风险和上线成本。",
      "technical_core": "高频题型包括 AI product case、feature metric、technical discussion、ML/LLM breadth check、behavioral、stakeholder/leadership、take-home/design exercise。越接近 AI PM/agentic AI 岗，越会追问数据、评测、权限、ROI 和 demo 到 production 的落差。",
      "product_questions": [
        "我有没有可展示的 AI proof-of-work？",
        "一个 AI feature case 里，我能讲清用户、数据、模型方案、评测和风险吗？",
        "我能把“懂 AI”落到真实项目决策，而不是只背术语吗？"
      ],
      "common_traps": [
        "只准备普通 PM case，不准备模型/数据/评测追问。",
        "只讲用了 ChatGPT，不讲用户问题和产品结果。",
        "把 AI demo 当成生产系统，没有 ROI 和风险意识。"
      ],
      "source_ids": [
        "bi-break-into-ai",
        "bi-salesforce-pm",
        "bi-tmobile-ai-pm",
        "alisa-job-search"
      ],
      "tags": [
        "interview",
        "AI PM",
        "case",
        "career"
      ]
    },
    {
      "id": "ai-pm-case-loop",
      "title": "AI PM Case Loop：用一条固定链路回答开放题",
      "layer": "interview intel",
      "pm_takeaway": "面对“设计一个 AI 功能/agent/推荐系统”的面试题，可以用固定链路兜住不确定性：用户痛点 -> 任务定义 -> 数据/权限 -> 模型或 RAG/agent 方案 -> 评测指标 -> 失败模式 -> 上线与成本。",
      "technical_core": "AI case 的核心不是炫模型名，而是展示你知道 AI 产品的不确定性来自哪里：数据分布、召回、模型能力、延迟成本、安全、反馈闭环、组织采纳和 ROI。",
      "product_questions": [
        "这个 case 的 ground truth 从哪里来？",
        "哪些失败会伤害用户或业务？",
        "上线后如何监控质量、成本和人工介入率？"
      ],
      "common_traps": [
        "一上来就选模型。",
        "只讲用户体验，不讲数据和评测。",
        "没有分阶段 rollout 和人工兜底。"
      ],
      "source_ids": [
        "bi-salesforce-pm",
        "bi-agentforce",
        "modern-software",
        "helm"
      ],
      "tags": [
        "case-interview",
        "metrics",
        "AI PM",
        "evaluation"
      ]
    },
    {
      "id": "tokenization",
      "title": "Tokenization：成本、边界和多语言体验的第一层",
      "layer": "model basics",
      "pm_takeaway": "Token 是 LLM 的计费、上下文长度、延迟和多语言表现的基本单位。中文、代码、专业术语可能被切得更碎，产品上会直接影响成本和召回/生成质量。",
      "technical_core": "常见 tokenizer 使用 BPE/byte-level BPE/SentencePiece 等，把文本映射成整数序列。模型并不直接看字符，而是看 token id 和 embedding。",
      "product_questions": [
        "目标语言或行业术语的 token 膨胀率是多少？",
        "长文档进入上下文前是否先做 chunking 和摘要？",
        "用户可见字数与 token 预算之间如何换算？"
      ],
      "common_traps": [
        "用字符数估算成本。",
        "忽视中文、表格、代码、JSON 的 token 膨胀。",
        "把 tokenizer 当成无关实现细节。"
      ],
      "source_ids": [
        "cs336",
        "wdndev",
        "rasbt"
      ],
      "tags": [
        "token",
        "cost",
        "context"
      ]
    },
    {
      "id": "attention-transformer",
      "title": "Attention/Transformer：理解上下文能力与长文本瓶颈",
      "layer": "model basics",
      "pm_takeaway": "Transformer 的核心价值是让 token 之间建立关系；产品上表现为上下文理解、引用、指代消解、多步骤推理。但 attention 也带来长上下文成本和延迟问题。",
      "technical_core": "自注意力用 Q/K/V 计算 token 间相关性，decoder-only 模型用 causal mask 只看过去 token。多头注意力让模型并行关注不同关系。",
      "product_questions": [
        "任务是否真的需要长上下文，还是需要更好的检索/摘要？",
        "模型在长上下文中能否找到 needle，还是只是在窗口里塞东西？",
        "是否需要引用定位来证明答案来源？"
      ],
      "common_traps": [
        "认为上下文窗口越长一定越好。",
        "忽视长上下文下的首 token 延迟和成本。",
        "把 attention 解释成“模型理解一切”。"
      ],
      "source_ids": [
        "attention",
        "cs336",
        "ai-eng-interview"
      ],
      "tags": [
        "attention",
        "transformer",
        "context"
      ]
    },
    {
      "id": "pretraining",
      "title": "Pretraining：通用能力来自数据规模和 next-token objective",
      "layer": "training",
      "pm_takeaway": "预训练解决“模型知道什么、语言能力如何”的底座问题；AIPM 通常不会自己预训练，但要理解为什么数据质量、版权、时效性和领域覆盖会限制模型。",
      "technical_core": "主流 LLM 在大规模文本/代码/多模态数据上学习预测下一个 token。训练损失下降通常带来通用能力提升，但具体产品能力还依赖数据分布与评测。",
      "product_questions": [
        "模型知识截止与业务知识更新怎么处理？",
        "领域数据缺口适合 RAG、SFT 还是继续预训练？",
        "数据版权、PII、污染和重复如何治理？"
      ],
      "common_traps": [
        "把预训练当成产品团队可轻易重做的事。",
        "以为更多数据必然更好。",
        "忽视数据清洗和去重。"
      ],
      "source_ids": [
        "cs336",
        "chinchilla",
        "mlabonne"
      ],
      "tags": [
        "pretraining",
        "data",
        "scaling"
      ]
    },
    {
      "id": "loss-gradient",
      "title": "Loss/Gradient：训练不是记答案，而是优化一个代理目标",
      "layer": "training basics",
      "pm_takeaway": "PM 理解 loss 和 gradient 的关键价值，是知道模型优化的是一个可计算代理目标，不一定等于用户真正想要的业务结果。很多对齐、评测和数据问题，本质上都是“目标定义错了或样本分布错了”。",
      "technical_core": "Loss 衡量预测与目标的差距，gradient 表示参数变化会如何影响 loss，反向传播把误差信号传回各层。大模型训练、SFT、偏好优化和很多 reranker/embedding 训练都依赖这个优化范式。",
      "product_questions": [
        "训练目标和产品成功指标一致吗？",
        "标注数据是否代表真实用户分布？",
        "优化一个指标是否会牺牲事实性、安全性或用户体验？"
      ],
      "common_traps": [
        "以为 loss 降低就等于产品变好。",
        "没有验证集和失败切片，只看总体分数。",
        "把训练问题误判为 prompt 或 UI 问题。"
      ],
      "source_ids": [
        "alisa-book",
        "cs336",
        "instructgpt",
        "dpo"
      ],
      "tags": [
        "loss",
        "gradient",
        "training",
        "evaluation"
      ]
    },
    {
      "id": "post-training",
      "title": "Post-training：让模型从“会续写”变成“会服务用户”",
      "layer": "training",
      "pm_takeaway": "SFT、RLHF、DPO、GRPO 等不是魔法增强智商，而是在改变模型的行为分布：遵循指令、输出风格、安全偏好、任务奖励和拒答边界。",
      "technical_core": "SFT 用示范数据学习回答格式和任务行为；偏好优化用 chosen/rejected 或 reward 信号推动模型更符合人类/任务偏好。",
      "product_questions": [
        "我们要改变的是知识、格式、风格、工具使用，还是某个任务的成功率？",
        "是否有足够高质量示范或偏好数据？",
        "对齐后是否损失专业任务能力？"
      ],
      "common_traps": [
        "用少量低质量 SFT 数据期待大幅提升。",
        "把 RLHF 当成事实性修复方案。",
        "只看安全拒答率，不看有用性。"
      ],
      "source_ids": [
        "instructgpt",
        "dpo",
        "cs336",
        "mlabonne"
      ],
      "tags": [
        "SFT",
        "RLHF",
        "DPO",
        "alignment"
      ]
    },
    {
      "id": "rag",
      "title": "RAG：产品中最常用的“补知识”方案",
      "layer": "application",
      "pm_takeaway": "当问题是企业知识、私有文档、实时信息或可引用事实时，优先考虑 RAG。RAG 的关键不是“接个向量库”，而是文档处理、召回、重排、引用、评测和权限。",
      "technical_core": "RAG 在生成前检索相关 chunks，把外部上下文放进 prompt。效果取决于 chunking、embedding、retriever、reranker、query rewriting、上下文压缩和答案约束。",
      "product_questions": [
        "答案必须引用来源吗？",
        "知识库更新频率和权限模型是什么？",
        "召回失败、引用错误、过期信息如何被发现？"
      ],
      "common_traps": [
        "只做向量相似度，不做召回评测。",
        "chunk 太大或太小都不管。",
        "把 RAG 当作 hallucination 的完全解药。"
      ],
      "source_ids": [
        "rag-paper",
        "mlabonne",
        "wdndev",
        "ai-eng-interview"
      ],
      "tags": [
        "RAG",
        "retrieval",
        "knowledge"
      ]
    },
    {
      "id": "embeddings",
      "title": "Embeddings：语义检索的地基，不是万能相似度",
      "layer": "application",
      "pm_takeaway": "Embedding 模型决定什么叫“相似”。AIPM 要关心领域、语言、query/document 长度、向量维度、成本、召回率，而不是只问用了哪个向量库。",
      "technical_core": "文本被编码成稠密向量，检索时用 cosine/dot product 等相似度找近邻。许多 RAG 失败来自 embedding 不适配或 query/document 表达不一致。",
      "product_questions": [
        "是否需要中英混合、代码、表格、专业术语 embedding？",
        "是否评估 recall@k、MRR、nDCG？",
        "是否需要 hybrid search 和 reranking？"
      ],
      "common_traps": [
        "把向量库品牌当成主要技术差异。",
        "没有负样本和困难查询集。",
        "只看 demo，不看系统性召回。"
      ],
      "source_ids": [
        "mlabonne",
        "ai-eng-interview",
        "wdndev"
      ],
      "tags": [
        "embedding",
        "retrieval",
        "RAG"
      ]
    },
    {
      "id": "agents",
      "title": "Agents：适合流程和工具，不适合无边界自治",
      "layer": "application",
      "pm_takeaway": "Agent 的价值在于把 LLM 连接到工具、状态和工作流；产品设计要控制动作空间、权限、回滚、观察信号和失败路径。越“自主”，越需要工程护栏。",
      "technical_core": "典型 agent loop 是 plan/think -> action/tool call -> observation -> next action。现代实现常用 function calling、workflow graph、memory、MCP 或特定框架。",
      "product_questions": [
        "用户任务是否真的需要多步工具调用？",
        "每个 tool call 的权限、审计和回滚是什么？",
        "失败时是让模型继续试，还是切换人工/规则流程？"
      ],
      "common_traps": [
        "把 chatbot 包一层工具就叫 agent。",
        "没有动作预算和终止条件。",
        "让模型直接执行高风险操作。"
      ],
      "source_ids": [
        "mlabonne",
        "wdndev",
        "ai-eng-interview"
      ],
      "tags": [
        "agent",
        "tools",
        "workflow"
      ]
    },
    {
      "id": "prompt-agent-product-ops",
      "title": "Prompt/Agent 产品能力：从模板转向任务、工具和成功标准",
      "layer": "application",
      "pm_takeaway": "Prompt 能力的重点不是背模板，而是把任务、输入输出结构、示例、约束、失败样例和成功标准讲清楚。Prompt 不够时，才进入换模型、加 RAG、加工具、加 workflow 或微调。",
      "technical_core": "Tool use/agent 的产品本质是：模型决定是否请求工具，应用负责执行工具并返回结果。PM 要设计触发边界、权限、日志、成本、失败重试、人工确认和回滚。",
      "product_questions": [
        "这个任务是 prompt 能解决，还是需要工具/检索/工作流？",
        "工具调用失败或返回脏数据时怎么办？",
        "成功标准和测试集是否先于 prompt 迭代定义？"
      ],
      "common_traps": [
        "把 prompt 当产品护城河。",
        "没有测试集就调 prompt。",
        "agent 可以执行高风险动作但没有权限边界。"
      ],
      "source_ids": [
        "openai-prompt",
        "anthropic-prompt",
        "anthropic-tool-use",
        "openai-agents-tools"
      ],
      "tags": [
        "prompt",
        "agent",
        "tools",
        "success-criteria"
      ]
    },
    {
      "id": "coding-agents",
      "title": "AI Coding Agents：从“帮我写代码”到软件生产系统",
      "layer": "application",
      "pm_takeaway": "面向开发者的 LLM 产品不能只看生成代码质量，还要设计上下文管理、工具调用、权限、测试、安全扫描、代码审查、部署后观测和人机协作边界。AI coding agent 是软件工程系统，不是聊天框。",
      "technical_core": "现代 coding agent 产品常包含 AI IDE、MCP/tool use、PRD/spec 驱动、agent autonomy levels、terminal automation、AI-generated tests、SAST/DAST、安全红队、AI code review 和 incident triage。",
      "product_questions": [
        "用户愿意把哪些开发动作交给 agent？",
        "上下文、repo 权限、工具权限和审计日志如何设计？",
        "生成代码如何通过测试、review、安全扫描和线上观测闭环？"
      ],
      "common_traps": [
        "把 demo 里的代码生成速度当成核心指标。",
        "没有权限模型就接入生产仓库和终端。",
        "只做 prompt，不做测试、review、回滚和观测。"
      ],
      "source_ids": [
        "modern-software",
        "mlabonne",
        "ai-eng-interview"
      ],
      "tags": [
        "coding-agent",
        "AI IDE",
        "MCP",
        "software-engineering"
      ]
    },
    {
      "id": "inference-cost",
      "title": "Inference：延迟、吞吐和成本是产品功能的一部分",
      "layer": "systems",
      "pm_takeaway": "用户体验不仅由模型质量决定，还由 TTFT、tokens/sec、并发、上下文长度、输出长度、缓存和服务稳定性决定。AIPM 要把成本和延迟写进 PRD。",
      "technical_core": "推理分 prefill 和 decode。长输入影响 prefill，长输出影响 decode。KV cache、continuous batching、PagedAttention、quantization、speculative decoding 都是常见优化。",
      "product_questions": [
        "首 token 延迟目标是多少？",
        "平均/峰值输出 token 数是多少？",
        "是否可以缓存、流式输出、限制上下文、分级路由模型？"
      ],
      "common_traps": [
        "只按输入请求数估算成本。",
        "忽视输出 token。",
        "上线后才发现并发和长文本把预算打爆。"
      ],
      "source_ids": [
        "vllm",
        "flashattention",
        "cs336",
        "mlabonne"
      ],
      "tags": [
        "inference",
        "cost",
        "latency"
      ]
    },
    {
      "id": "evaluation",
      "title": "Evaluation：没有任务评测，模型选择就是玄学",
      "layer": "evaluation",
      "pm_takeaway": "AIPM 最核心的技术杠杆之一是定义评测。通用榜单只能做初筛；产品上线要有任务集、失败分类、人工标注、LLM-as-judge 校准和线上监控。",
      "technical_core": "评测可以分为自动 benchmark、任务单元测试、RAG 检索评测、人类偏好、LLM judge、红队安全测试和线上业务指标。",
      "product_questions": [
        "黄金测试集覆盖哪些真实场景？",
        "失败是事实错、格式错、拒答错、召回错、工具错还是安全错？",
        "离线分数如何对应线上 KPI？"
      ],
      "common_traps": [
        "只看 MMLU/榜单。",
        "没有回归测试，prompt 一改全靠感觉。",
        "LLM judge 未校准就当真理。"
      ],
      "source_ids": [
        "helm",
        "arena",
        "cs336",
        "mlabonne"
      ],
      "tags": [
        "evaluation",
        "metrics",
        "quality"
      ]
    },
    {
      "id": "evals-risk-governance",
      "title": "Evals + 风险治理：AI PM 和普通 PM 的分水岭",
      "layer": "evaluation",
      "pm_takeaway": "AI PM 必须能回答“这个 AI 功能怎么算好、哪些失败不能上线”。这需要任务级 evals、失败分级、人工审核、灰度、反馈闭环，以及对公平、隐私、安全、透明和问责的基本治理意识。",
      "technical_core": "OpenAI Evals 类工具解决可重复测量，NIST/Microsoft/Google 的 responsible AI 框架帮助定义可信 AI 的治理边界。对企业 AI 产品，evals 和 risk management 是产品流程的一部分，不是上线前补文档。",
      "product_questions": [
        "离线评测能预测线上业务结果吗？",
        "哪些失败是低风险，哪些必须阻断上线？",
        "是否有人工审核、监控、申诉、回滚和审计机制？"
      ],
      "common_traps": [
        "只看准确率，不看严重失败。",
        "把安全合规放到发布前最后一天。",
        "没有灰度和反馈闭环。"
      ],
      "source_ids": [
        "openai-evals",
        "reforge-blog",
        "nist-ai-rmf",
        "microsoft-responsible-ai",
        "google-ai-principles"
      ],
      "tags": [
        "evals",
        "risk",
        "governance",
        "responsible-ai"
      ]
    },
    {
      "id": "data-quality",
      "title": "Data quality：数据工程决定模型/知识库上限",
      "layer": "data",
      "pm_takeaway": "无论预训练、SFT 还是 RAG，脏数据都会以幻觉、偏见、过期答案、重复答案和安全风险的形式回到产品。数据质量是产品质量的一部分。",
      "technical_core": "关键动作包括抽取、清洗、去重、PII 处理、版权/权限、质量过滤、分布监控、污染检测和更新机制。",
      "product_questions": [
        "知识源谁拥有？谁审核？多久更新？",
        "是否有 PII/敏感信息/版权限制？",
        "错误数据进入后如何发现和回滚？"
      ],
      "common_traps": [
        "只关心模型不关心数据管线。",
        "RAG 文档没有版本和权限。",
        "把爬来的网页直接入库。"
      ],
      "source_ids": [
        "cs336",
        "mlabonne",
        "wdndev"
      ],
      "tags": [
        "data",
        "quality",
        "governance"
      ]
    },
    {
      "id": "fine-tuning",
      "title": "Fine-tuning：适合稳定行为，不适合动态知识库",
      "layer": "training",
      "pm_takeaway": "微调适合让模型学稳定格式、风格、分类边界、工具调用习惯或垂直任务模式；不适合频繁变化的事实知识。多数 AIPM 场景应先比较 prompt/RAG/SFT 的收益成本。",
      "technical_core": "Full fine-tuning 更新全部参数，LoRA/QLoRA 只训练少量 adapter，成本低很多。SFT 数据质量通常比数量更重要。",
      "product_questions": [
        "要学的是知识还是行为？",
        "数据是否可持续生产和验证？",
        "微调后是否有回归评测和回滚方案？"
      ],
      "common_traps": [
        "用微调塞知识。",
        "没有 baseline 就开训。",
        "用生产私有数据训练但没有合规审查。"
      ],
      "source_ids": [
        "lora",
        "qlora",
        "mlabonne",
        "cs336"
      ],
      "tags": [
        "fine-tuning",
        "LoRA",
        "SFT"
      ]
    },
    {
      "id": "safety-security",
      "title": "Safety/Security：LLM 风险来自模型、数据、工具和用户交互",
      "layer": "risk",
      "pm_takeaway": "安全不是只加一句 system prompt。需要区分内容安全、隐私泄露、prompt injection、越权工具调用、数据投毒、评测绕过和业务误操作。",
      "technical_core": "LLM 应用常见防线包括输入/输出过滤、权限隔离、工具参数校验、检索源隔离、敏感数据脱敏、审计日志、红队测试和人工升级。",
      "product_questions": [
        "模型能访问哪些数据和工具？",
        "用户能否通过 prompt 越权改变规则？",
        "高风险动作是否需要确认和审计？"
      ],
      "common_traps": [
        "把 system prompt 当安全边界。",
        "RAG 检索到恶意指令后直接执行。",
        "没有权限模型和日志。"
      ],
      "source_ids": [
        "mlabonne",
        "cs336",
        "ai-eng-interview"
      ],
      "tags": [
        "safety",
        "security",
        "prompt-injection"
      ]
    },
    {
      "id": "pm-fundamentals",
      "title": "产品经理基本功：AI PM 仍然首先是 PM",
      "layer": "product craft",
      "pm_takeaway": "AI PM 不能只靠懂模型。长期能力仍然来自 product discovery、用户研究、原型、实验、定价、增长、沟通和组织推进。AI 只是改变问题空间，不会替代 PM 基本功。",
      "technical_core": "Lenny、SVPG、Reforge、Intercom 这类资源适合长期看：它们分别补 PM 案例密度、产品发现/产品模式、增长与实验、以及 AI customer support/B2B SaaS 落地。",
      "product_questions": [
        "我能否先证明用户问题值得做，再谈 AI 方案？",
        "有没有 prototype 或 concierge test，而不只是模型 demo？",
        "商业指标、采用率、定价和运营成本是否闭环？"
      ],
      "common_traps": [
        "把 AI 新鲜感当需求验证。",
        "忽略 go-to-market、定价和变更管理。",
        "只会和工程师聊模型，不会和业务方聊价值。"
      ],
      "source_ids": [
        "lenny",
        "svpg",
        "reforge-blog",
        "intercom-blog"
      ],
      "tags": [
        "PM",
        "product-discovery",
        "growth",
        "B2B"
      ]
    }
  ],
  "qa": [
    {
      "question": "为什么 LLM 通常是 decoder-only 架构？",
      "short_answer": "因为通用生成任务需要自回归预测下一个 token，decoder-only 结构简单、可扩展、训练/推理路径统一，适合大规模生成。",
      "pm_angle": "PM 只需记住：它天生擅长续写/生成，不天然保证事实正确或流程正确。",
      "tags": [
        "architecture"
      ],
      "source_ids": [
        "cs336",
        "wdndev"
      ]
    },
    {
      "question": "AIPM 需要把神经网络学到多深？",
      "short_answer": "需要懂输入、权重、激活、loss、gradient、attention、token 和 decoding 的直觉，能判断问题属于数据、模型、上下文、训练、推理还是评测层。",
      "pm_angle": "目标不是成为研究员，而是能把技术约束翻译成产品方案、评测指标和风险边界。",
      "tags": [
        "learning",
        "foundations"
      ],
      "source_ids": [
        "alisa-book",
        "cs336"
      ]
    },
    {
      "question": "为什么 PM 也要理解 loss 和 gradient？",
      "short_answer": "因为模型训练是在优化一个代理目标，gradient 只是告诉参数朝哪个方向能让这个目标变好。代理目标和真实用户价值不一致时，模型会“认真地优化错东西”。",
      "pm_angle": "这能帮助你追问数据标注、评测集、偏好目标和线上 KPI 是否对齐。",
      "tags": [
        "training",
        "evaluation"
      ],
      "source_ids": [
        "alisa-book",
        "cs336",
        "dpo"
      ]
    },
    {
      "question": "AI PM 真正要补哪 6 类能力？",
      "short_answer": "AI 基础认知、AI 产品设计、Prompt/LLM 产品能力、Agent/工具调用、AI 产品评估、负责任 AI/风险治理。产品基本功要长期补。",
      "pm_angle": "用这 6 类来筛资源，比刷新闻和收藏 prompt 模板更有效。",
      "tags": [
        "learning",
        "AI PM"
      ],
      "source_ids": [
        "ai-for-everyone",
        "pair-guidebook",
        "openai-evals",
        "nist-ai-rmf"
      ]
    },
    {
      "question": "CS336 对 AIPM 或转岗面试的价值是什么？",
      "short_answer": "它把 tokenization、Transformer、训练、推理、数据、评测和对齐串成一张完整地图，适合作为补齐 LLM 技术宽度的骨架。",
      "pm_angle": "学完不代表能做研究，但能更快定位问题、追问实验、理解工程实现，并和技术团队对齐。",
      "tags": [
        "learning",
        "career"
      ],
      "source_ids": [
        "cs336",
        "alisa-job-search"
      ]
    },
    {
      "question": "AI PM 面试和普通 PM 面试最大差异是什么？",
      "short_answer": "普通 PM 面试更偏用户、商业和执行；AI PM 面试会额外追问数据来源、模型边界、评测、延迟成本、风险、人工兜底和 demo 到生产的可行性。",
      "pm_angle": "回答时要把 AI 不确定性纳入产品方案，而不是把模型当确定性 API。",
      "tags": [
        "interview",
        "AI PM"
      ],
      "source_ids": [
        "bi-salesforce-pm",
        "bi-agentforce"
      ]
    },
    {
      "question": "面试中如何证明自己真的懂 AI，而不是只会用 AI？",
      "short_answer": "最强证据是 proof-of-work：做过真实 AI feature、从零实现过关键组件、搭过 RAG/agent demo、做过评测集，或能讲清一次失败和改进。",
      "pm_angle": "简历和面试都要突出用户问题、技术取舍、指标结果和反思，而不是工具清单。",
      "tags": [
        "career",
        "proof-of-work"
      ],
      "source_ids": [
        "bi-break-into-ai",
        "bi-tmobile-ai-pm",
        "alisa-job-search"
      ]
    },
    {
      "question": "AI product case 可以用什么答题框架？",
      "short_answer": "先定义用户任务和成功标准，再讲数据/权限、模型或 RAG/agent 方案、评测指标、失败模式、成本延迟、安全、上线节奏和反馈闭环。",
      "pm_angle": "这套框架能让开放题从“创意题”变成可验证的产品系统设计题。",
      "tags": [
        "case-interview",
        "evaluation"
      ],
      "source_ids": [
        "bi-salesforce-pm",
        "modern-software",
        "helm"
      ]
    },
    {
      "question": "什么时候 prompt 不够，需要升级方案？",
      "short_answer": "当问题来自知识缺失、上下文检索差、工具动作、结构化输出稳定性、权限、安全或可评测一致性时，单纯调 prompt 往往不够。",
      "pm_angle": "PM 要把 prompt baseline 当起点，再决定是否加 RAG、工具、workflow、微调或 eval regression。",
      "tags": [
        "prompt",
        "agent"
      ],
      "source_ids": [
        "openai-prompt",
        "anthropic-prompt",
        "anthropic-tool-use"
      ]
    },
    {
      "question": "Responsible AI 对 PM 具体意味着什么？",
      "short_answer": "不是价值观口号，而是把公平、可靠安全、隐私安全、透明、问责、人类控制和风险管理纳入需求、设计、评测和上线流程。",
      "pm_angle": "企业 AI 产品越接近金融、广告、客服、HR、协同和决策，就越需要这套治理语言。",
      "tags": [
        "responsible-ai",
        "risk"
      ],
      "source_ids": [
        "nist-ai-rmf",
        "microsoft-responsible-ai",
        "google-ai-principles"
      ]
    },
    {
      "question": "MCP 对 PM 意味着什么？",
      "short_answer": "MCP 是让模型/agent 连接外部工具和数据源的一类协议与生态，产品意义是把工具接入变成可复用、可治理、可授权的接口层。",
      "pm_angle": "做 agent 产品时，要把 MCP 当成权限、审计、工具质量和开发者生态问题，而不只是技术集成。",
      "tags": [
        "agent",
        "MCP"
      ],
      "source_ids": [
        "modern-software"
      ]
    },
    {
      "question": "AI coding agent 产品应该评测什么？",
      "short_answer": "不仅评测代码是否能运行，还要评测上下文命中率、测试通过率、补丁可读性、安全风险、review 接受率、回滚能力、任务完成时间和人工介入成本。",
      "pm_angle": "面向开发者的 LLM 产品，真正的 KPI 是交付质量和信任，而不是生成 token 数。",
      "tags": [
        "coding-agent",
        "evaluation"
      ],
      "source_ids": [
        "modern-software",
        "helm"
      ]
    },
    {
      "question": "Token、word、character 有什么区别？",
      "short_answer": "Token 是模型处理和计费的基本单位，可能是字符、子词、词片段或字节组合，不等于自然语言中的词。",
      "pm_angle": "估算成本和上下文时用 token，不要用字数。",
      "tags": [
        "token"
      ],
      "source_ids": [
        "cs336",
        "rasbt"
      ]
    },
    {
      "question": "Embedding 是什么？",
      "short_answer": "Embedding 是把离散对象映射到向量空间的表示，语义相近的文本通常向量距离更近。",
      "pm_angle": "RAG 质量很大程度取决于 embedding 是否适配业务语料。",
      "tags": [
        "embedding",
        "RAG"
      ],
      "source_ids": [
        "mlabonne",
        "ai-eng-interview"
      ]
    },
    {
      "question": "Attention 解决了什么问题？",
      "short_answer": "Attention 让每个 token 动态关注其他 token，从而建模长距离依赖和上下文关系。",
      "pm_angle": "它解释了模型为何能处理上下文，也解释了长上下文为什么贵。",
      "tags": [
        "attention"
      ],
      "source_ids": [
        "attention",
        "cs336"
      ]
    },
    {
      "question": "为什么 attention 里有 Q/K/V？",
      "short_answer": "Q 表示当前 token 想找什么，K 表示其他 token 提供什么索引，V 是被聚合的信息内容。",
      "pm_angle": "这能帮助 PM 理解“检索”和“上下文关联”的类比，但不要过度拟人化。",
      "tags": [
        "attention"
      ],
      "source_ids": [
        "ai-eng-interview"
      ]
    },
    {
      "question": "KV cache 是什么？",
      "short_answer": "生成时缓存历史 token 的 Key/Value，避免每生成一个 token 都重新计算全部历史。",
      "pm_angle": "KV cache 提升速度但吃显存；长对话和长文档会显著推高成本。",
      "tags": [
        "inference"
      ],
      "source_ids": [
        "vllm",
        "cs336"
      ]
    },
    {
      "question": "RAG 和 fine-tuning 怎么选？",
      "short_answer": "动态知识、私有知识、可引用知识优先 RAG；稳定格式、风格、任务行为可考虑 fine-tuning。",
      "pm_angle": "先问“缺知识还是缺行为”。这是最实用的一条。",
      "tags": [
        "RAG",
        "fine-tuning"
      ],
      "source_ids": [
        "mlabonne",
        "rag-paper"
      ]
    },
    {
      "question": "RAG 为什么还会 hallucinate？",
      "short_answer": "可能是没召回、召回错、上下文太噪、模型没遵守引用、问题本身超出证据，或评测没覆盖。",
      "pm_angle": "RAG 不是事实保险，需要检索评测和答案评测两套指标。",
      "tags": [
        "RAG",
        "evaluation"
      ],
      "source_ids": [
        "rag-paper",
        "mlabonne"
      ]
    },
    {
      "question": "LoRA/QLoRA 解决什么问题？",
      "short_answer": "它们降低微调显存和计算成本，LoRA 训练低秩 adapter，QLoRA 结合量化进一步节省显存。",
      "pm_angle": "适合试验垂直行为微调，但仍然需要高质量数据和评测。",
      "tags": [
        "fine-tuning"
      ],
      "source_ids": [
        "lora",
        "qlora"
      ]
    },
    {
      "question": "RLHF、DPO、GRPO 大概差别是什么？",
      "short_answer": "RLHF 通常包含奖励模型和策略优化；DPO 用偏好对直接优化；GRPO 常用于可验证奖励下的推理训练。",
      "pm_angle": "不用深背公式，重点是它们改变偏好和任务行为，不是修复知识库的首选。",
      "tags": [
        "alignment"
      ],
      "source_ids": [
        "instructgpt",
        "dpo",
        "cs336"
      ]
    },
    {
      "question": "为什么模型榜单高不等于产品好？",
      "short_answer": "榜单任务和真实用户分布不同，产品还受延迟、成本、稳定性、安全、格式遵循和工具链影响。",
      "pm_angle": "模型选型要用自己的黄金集和线上目标闭环。",
      "tags": [
        "evaluation"
      ],
      "source_ids": [
        "helm",
        "arena"
      ]
    },
    {
      "question": "Prompt engineering 什么时候够用？",
      "short_answer": "当任务主要是格式、语气、步骤约束、少量示例和轻量知识注入时，prompt/template 通常最快。",
      "pm_angle": "先做 prompt baseline，再决定是否上 RAG 或微调。",
      "tags": [
        "prompt"
      ],
      "source_ids": [
        "mlabonne",
        "ai-eng-interview"
      ]
    },
    {
      "question": "为什么长上下文不是 RAG 的替代品？",
      "short_answer": "长上下文能塞更多材料，但不保证检索、排序、证据使用和成本可控。RAG 还能处理权限、更新和引用。",
      "pm_angle": "长上下文适合少量长材料精读，企业知识库仍要检索系统。",
      "tags": [
        "context",
        "RAG"
      ],
      "source_ids": [
        "cs336",
        "mlabonne"
      ]
    },
    {
      "question": "什么是 temperature/top-p/top-k？",
      "short_answer": "它们控制采样随机性：temperature 改变分布尖锐程度，top-k/top-p 限制候选 token 集合。",
      "pm_angle": "事实型任务降低随机性，创意型任务可提高随机性，但要监控一致性。",
      "tags": [
        "decoding"
      ],
      "source_ids": [
        "wdndev",
        "mlabonne"
      ]
    },
    {
      "question": "为什么首 token 延迟常常很高？",
      "short_answer": "模型要先处理完整输入上下文完成 prefill，然后才进入逐 token decode。长输入会拖慢首 token。",
      "pm_angle": "流式输出只能改善感知，不会消灭 prefill 成本。",
      "tags": [
        "inference"
      ],
      "source_ids": [
        "cs336",
        "vllm"
      ]
    },
    {
      "question": "什么是 prompt injection？",
      "short_answer": "用户或检索文档中的恶意指令试图覆盖系统规则或诱导模型泄露/越权。",
      "pm_angle": "凡是有 RAG 或工具调用的产品都要把它当安全需求，而非边缘问题。",
      "tags": [
        "security"
      ],
      "source_ids": [
        "mlabonne"
      ]
    },
    {
      "question": "SFT 数据越多越好吗？",
      "short_answer": "不是。高质量、多样、覆盖目标行为的数据通常比低质量大规模数据更重要。",
      "pm_angle": "AIPM 要设计数据生产、审核和失败样本回流机制。",
      "tags": [
        "SFT",
        "data"
      ],
      "source_ids": [
        "cs336",
        "mlabonne"
      ]
    },
    {
      "question": "MoE 对产品意味着什么？",
      "short_answer": "MoE 用稀疏激活扩大参数容量，可能提升性价比，但路由、稳定性和服务复杂度更高。",
      "pm_angle": "选模型时关心实际延迟/成本/稳定性，不必只看参数总量。",
      "tags": [
        "MoE"
      ],
      "source_ids": [
        "cs336",
        "wdndev"
      ]
    },
    {
      "question": "量化会损失效果吗？",
      "short_answer": "可能。低精度能降显存和成本，但对困惑任务、长上下文、数学/代码等可能有质量影响。",
      "pm_angle": "量化是成本手段，必须用产品任务集回归。",
      "tags": [
        "quantization"
      ],
      "source_ids": [
        "mlabonne"
      ]
    },
    {
      "question": "Agent 和 workflow 有什么区别？",
      "short_answer": "Workflow 是预定义流程，agent 让模型动态选择下一步和工具。",
      "pm_angle": "能用 workflow 就先用 workflow；agent 留给路径变化大且收益明确的场景。",
      "tags": [
        "agent"
      ],
      "source_ids": [
        "mlabonne",
        "ai-eng-interview"
      ]
    }
  ],
  "decisions": [
    {
      "title": "Prompt-only",
      "use_when": [
        "任务简单、知识稳定、只需格式/语气控制",
        "需要快速验证需求",
        "错误成本低"
      ],
      "avoid_when": [
        "需要私有/实时知识",
        "需要可引用证据",
        "复杂多步工具动作"
      ],
      "pm_checks": [
        "先做 20-50 条黄金样例",
        "记录 prompt 版本",
        "设置格式校验和回归测试"
      ]
    },
    {
      "title": "RAG",
      "use_when": [
        "企业知识库、政策、文档问答、客服知识、实时信息",
        "答案需要引用和权限控制",
        "知识频繁更新"
      ],
      "avoid_when": [
        "问题主要是输出风格或固定任务行为",
        "文档质量不可控且无治理",
        "用户问题无法映射到文本证据"
      ],
      "pm_checks": [
        "评估 recall@k 与答案 groundedness",
        "设计 chunking、rerank、权限和引用",
        "建立失败样本回流"
      ]
    },
    {
      "title": "Fine-tuning / LoRA",
      "use_when": [
        "需要稳定格式、领域风格、分类边界、工具调用模式",
        "有高质量标注/示范数据",
        "prompt 成本过高或一致性不够"
      ],
      "avoid_when": [
        "只是缺最新知识",
        "没有评测集",
        "数据合规不清楚"
      ],
      "pm_checks": [
        "先比较 prompt/RAG baseline",
        "固定训练/验证/回归集",
        "明确回滚和模型版本管理"
      ]
    },
    {
      "title": "Agent / Tool use",
      "use_when": [
        "任务需要多步操作、查数据、调用系统、写入状态",
        "路径因用户和环境变化而变化",
        "工具结果能被验证"
      ],
      "avoid_when": [
        "只需单轮问答",
        "高风险动作无权限/确认",
        "工具 API 不稳定或无审计"
      ],
      "pm_checks": [
        "限定 tool schema 和动作预算",
        "高风险操作二次确认",
        "记录 action/observation 日志"
      ]
    },
    {
      "title": "Bigger model",
      "use_when": [
        "小模型已通过工程优化仍无法满足推理/语言/鲁棒性",
        "用户愿意为质量支付延迟和成本",
        "任务依赖强泛化能力"
      ],
      "avoid_when": [
        "问题是知识没给、检索差、评测差或流程差",
        "预算/延迟硬约束",
        "边际收益未验证"
      ],
      "pm_checks": [
        "用同一黄金集比较质量、延迟和成本",
        "考虑路由：简单任务小模型，难任务大模型",
        "监控输出长度和失败类型"
      ]
    },
    {
      "title": "AI PM interview prep",
      "use_when": [
        "准备转 AI PM/AIPM/agentic AI PM",
        "面试包含 AI case、technical discussion 或 design exercise",
        "需要把 CS336/八股转成面试表达"
      ],
      "avoid_when": [
        "只是泛泛了解 LLM 概念",
        "没有目标岗位 JD",
        "没有任何 proof-of-work 可讲"
      ],
      "pm_checks": [
        "准备 2 个 AI 项目故事：问题、方案、指标、失败、复盘",
        "练 3 类 case：RAG、agent、AI feature metrics",
        "把技术概念翻译成 PM 决策语言"
      ]
    },
    {
      "title": "AI PM learning sprint",
      "use_when": [
        "不知道该看什么资源",
        "准备 AI PM/商业化 PM/企业 AI 产品岗",
        "需要把学习变成面试和项目输出"
      ],
      "avoid_when": [
        "已经有明确技术深挖目标",
        "只是想追行业新闻",
        "没有时间做练习和沉淀"
      ],
      "pm_checks": [
        "第一周补 AI for Everyone + PAIR，产出 AI 产品设计 checklist",
        "第二周补 prompt/tool/evals，产出 20 条测试集和 1 个 demo",
        "第三周补 responsible AI + PM craft，产出 case answer 和风险清单"
      ]
    }
  ],
  "glossary": {
    "Token": "模型处理和计费的基本文本单位。",
    "Context window": "模型一次可接收的 token 上限，包括系统提示、用户输入、检索内容和历史。",
    "Embedding": "把文本等对象编码成向量，用于语义检索、聚类、相似度计算。",
    "Transformer": "以 attention 为核心的主流 LLM 架构。",
    "Attention": "让 token 根据相关性聚合上下文信息的机制。",
    "MLP": "Multi-layer perceptron，多层全连接网络，是理解深度学习前向计算和反向传播的基础模型。",
    "Activation function": "给神经网络加入非线性的函数，如 ReLU、GELU、SwiGLU；没有非线性，多层线性层仍等价于一个线性变换。",
    "Loss": "训练时被优化的可计算目标，用来衡量模型输出与目标之间的差距。",
    "Gradient": "表示参数或输入发生微小变化时，loss 或输出会如何变化的方向和幅度。",
    "Backpropagation": "把 loss 的误差信号从输出层传回各层以计算梯度的算法。",
    "MCP": "Model Context Protocol，用于让模型或 agent 连接外部工具、数据源和服务的接口协议生态。",
    "Coding agent": "能读取代码上下文、调用工具、修改文件、运行测试并迭代解决开发任务的 LLM agent。",
    "AI IDE": "把 LLM、上下文检索、代码编辑、工具调用和开发工作流集成进 IDE 的产品形态。",
    "AI code review": "用模型辅助发现代码质量、安全、可维护性和测试覆盖问题的 review 工作流。",
    "Observability": "通过 logs、metrics、traces 和事件记录理解系统运行状态，LLM/agent 产品还需要记录 prompt、tool call、检索和评测信号。",
    "Proof-of-work": "能证明你真的做过 AI 产品或技术实践的作品、项目、demo、评测或复盘。",
    "AI product case": "围绕 AI 功能、agent、推荐、自动化或智能助手设计的产品 case 面试题。",
    "Technical discussion": "不一定写代码，但会围绕模型、数据、实验、系统和业务取舍深入追问的技术面试。",
    "Hiring manager outreach": "直接联系招聘经理或团队负责人，用匹配度和作品争取面试机会的求职动作。",
    "Success criteria": "在 prompt、eval、PRD 或实验开始前定义的成功标准，用于判断 AI 功能是否达标。",
    "Human-in-the-loop": "在人机协作中保留人工确认、审核、接管或回滚的机制，常用于高风险 AI 动作。",
    "Responsible AI": "把公平、可靠安全、隐私安全、透明、问责和人类控制纳入 AI 产品生命周期的治理实践。",
    "Product discovery": "在投入大规模建设前验证用户问题、价值、可用性和可行性的产品探索过程。",
    "Decoder-only": "自回归生成架构，GPT 类模型常见。",
    "Pretraining": "用海量数据学习通用语言/知识能力。",
    "SFT": "Supervised Fine-Tuning，用示范数据训练模型遵循目标行为。",
    "RLHF": "Reinforcement Learning from Human Feedback，用人类偏好训练模型行为。",
    "DPO": "Direct Preference Optimization，直接用偏好对优化模型。",
    "GRPO": "Group Relative Policy Optimization，常用于可验证奖励的推理训练场景。",
    "RAG": "Retrieval-Augmented Generation，生成前检索外部知识。",
    "Reranker": "对初步召回文档重新排序以提升相关性。",
    "KV cache": "推理时缓存历史 Key/Value，加速逐 token 生成。",
    "TTFT": "Time To First Token，首 token 延迟。",
    "Throughput": "单位时间可生成/处理的 token 或请求量。",
    "Quantization": "用低精度表示权重/激活以降低显存和成本。",
    "LoRA": "低秩适配器微调方法，只训练少量参数。",
    "Hallucination": "模型输出不受证据支持或事实错误的内容。",
    "Prompt injection": "通过输入或文档中的恶意指令劫持模型行为。"
  }
}