{"@context":"https://schema.org","@graph":[{"@type":"WebPage","@id":"https://inquir.org/use-cases/llm-pipelines#webpage","url":"https://inquir.org/use-cases/llm-pipelines","name":"LLM pipelines and serverless AI workflows","headline":"LLM pipelines and serverless AI workflows","description":"LLM pipelines with retrieval, moderation, tool calls, summarization, retries, and cost control—each stage is a serverless function with traces instead of one giant prompt.","inLanguage":"en-US","isPartOf":{"@id":"https://inquir.org/#website"},"author":{"@type":"Organization","name":"Inquir"},"datePublished":"2025-01-01T00:00:00.000Z","dateModified":"2026-04-20T00:00:00.000Z","citation":"https://inquir.org/docs"},{"@type":"BreadcrumbList","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https://inquir.org"},{"@type":"ListItem","position":2,"name":"Use cases","item":"https://inquir.org/use-cases"},{"@type":"ListItem","position":3,"name":"LLM pipelines & AI workflows","item":"https://inquir.org/use-cases/llm-pipelines"}]},{"@type":"FAQPage","@id":"https://inquir.org/use-cases/llm-pipelines#faq","url":"https://inquir.org/use-cases/llm-pipelines","isPartOf":{"@id":"https://inquir.org/use-cases/llm-pipelines#webpage"},"mainEntity":[{"@type":"Question","name":"Why split an LLM workflow into stages?","acceptedAnswer":{"@type":"Answer","text":"Retries, cost attribution, and debugging improve when retrieval, moderation, tool calls, and summarization are separate steps with their own logs."}},{"@type":"Question","name":"Streaming tokens to end users?","acceptedAnswer":{"@type":"Answer","text":"Keep user-visible streaming at the boundary; internal stages can use request/response for simpler failure handling and replays."}},{"@type":"Question","name":"How do I control cost across stages?","acceptedAnswer":{"@type":"Answer","text":"Measure tokens and wall time per stage in observability; cap expensive steps with budgets and short-circuit when moderation fails."}}]}]}