ACR_NAME="<YOUR_ACR_NAME>"
# OTEL Collector
docker pull otel/opentelemetry-collector-contrib:latest
docker tag otel/opentelemetry-collector-contrib:latest ${ACR_NAME}.azurecr.io/otel/opentelemetry-collector-contrib:latest
docker push ${ACR_NAME}.azurecr.io/otel/opentelemetry-collector-contrib:latest
# Zipkin
docker pull openzipkin/zipkin:latest
docker tag openzipkin/zipkin:latest ${ACR_NAME}.azurecr.io/openzipkin/zipkin:latest
docker push ${ACR_NAME}.azurecr.io/openzipkin/zipkin:latest
# Prometheus
docker pull prom/prometheus:latest
docker tag prom/prometheus:latest ${ACR_NAME}.azurecr.io/prom/prometheus:latest
docker push ${ACR_NAME}.azurecr.io/prom/prometheus:latest
# Aspire Dashboard
docker pull mcr.microsoft.com/dotnet/aspire-dashboard:latest
docker tag mcr.microsoft.com/dotnet/aspire-dashboard:latest ${ACR_NAME}.azurecr.io/dotnet/aspire-dashboard:latest
docker push ${ACR_NAME}.azurecr.io/dotnet/aspire-dashboard:latest
cd /helm-otel-chd
helm package .
helm push <helm-chart-name>-<version>.tgz oci://<ACR_NAME>.azurecr.io/<target_repo>
|
|
Replace all <PLACEHOLDER> values in helm-otel-chd/values-azure.yaml.
|
acrToken:
enabled: true
registry: "<YOUR_ACR_NAME>.azurecr.io"
tokenName: "<TOKEN_NAME>"
tokenPassword: "<TOKEN_PASSWORD>"
secretName: "<SECRET_NAME>"
otelCollector:
image:
repository: <YOUR_ACR_NAME>.azurecr.io/otel/opentelemetry-collector-contrib
prometheus:
image:
repository: <YOUR_ACR_NAME>.azurecr.io/prom/prometheus
zipkin:
image:
repository: <YOUR_ACR_NAME>.azurecr.io/openzipkin/zipkin
aspireDashboard:
image:
repository: <YOUR_ACR_NAME>.azurecr.io/dotnet/aspire-dashboard
az aks get-credentials \
--resource-group <AKS_RESOURCE_GROUP> \
--name <AKS_NAME>
kubectl get namespace
helm upgrade --install <release_name> \
oci://<ACR_NAME>.azurecr.io/<PATH_TO_HELM_REPO> \
--version <VERSION> -n <NAMESPACE> --create-namespace -f values-azure.yaml
otel:
enableOtelCollector: true
otelExporterEndpoint: "otel-collector-chd.cbai-otel.svc.cluster.local:4317"
|
|
The complete FQDN, <service>.<namespace>.svc.cluster.local, is required because cb-ai-service runs in the cb-ai-service namespace, while the OTEL stack runs in the cbai-otel namespace.
|
|
|
Before pushing the image to Container Registry (ACR), tag it correctly.
Use the loaded image tag as-is after docker load. Refer the output for step 4 for tag.
|
cd /helm-chd
helm package .
helm push <helm-chart-name>-<version>.tgz oci://<ACR_NAME>.azurecr.io/<target_repo>
# -------------------------------
# ACR Token Configuration
# -------------------------------
acrToken:
enabled: true
registry: "<ACR_NAME>.azurecr.io"
tokenName: "<ACR_TOKEN_USERNAME>"
tokenPassword: "<ACR_TOKEN_PASSWORD>"
secretName: "<secret_name>"
# -------------------------------
# Image Configuration
# -------------------------------
image:
domain: "<ACR_NAME>.azurecr.io"
repository: "<repository_name>"
pullPolicy: IfNotPresent
tag: "<IMAGE_TAG>"
# -------------------------------
# Workload Identity
# -------------------------------
workloadIdentity:
enabled: true
clientId: "<USER_ASSIGNED_MANAGED_IDENTITY_CLIENT_ID>"
tenantId: "<AZURE_TENANT_ID>"
tokenExpirationSeconds: 3600
# -------------------------------
# Azure Configuration
# -------------------------------
azure:
openai:
baseUrl: "https://<OPENAI_RESOURCE_NAME>.openai.azure.com/"
auth:
tenantId: "<AZURE_TENANT_ID>"
audience: "<APP_REGISTRATION_CLIENT_ID>"
az aks get-credentials \
--resource-group <AKS_RESOURCE_GROUP> \
--name <AKS_NAME>
helm upgrade --install cb-ai-service \
oci://<ACR_NAME>.azurecr.io/<PATH_TO_HELM_REPO> \
--version <VERSION> -n <NAMESPACE> -f values-azure-customer-hosted.yaml
az ad app credential reset \
--id <client_id> \
--display-name "cbai-client-secret" \
--years 1
#!/usr/bin/env python3
"""Fetch an Azure Entra ID access token via client credentials.
All identity parameters (client-id, client-secret, tenant-id, scope) must be
supplied by the caller – either as CLI flags or via environment variables.
Usage example (prefers environment variables to keep secrets out of shell history):
export CB_CUSTOMER_HOSTED_CLIENT_ID="<client id>"
export CB_CUSTOMER_HOSTED_CLIENT_SECRET="<client-secret>"
export CB_CUSTOMER_HOSTED_TENANT_ID="<tenant id>"
export CB_CUSTOMER_HOSTED_SCOPE="api://<app-id>/.default"
python scripts/get_customer_hosted_token.py
Pass --json to inspect the raw Azure response.
"""
from __future__ import annotations
import argparse
import json
import os
import sys
from typing import Any
import httpx
DEFAULT_TOKEN_URL_TEMPLATE = (
"https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token"
)
class TokenRetrievalError(RuntimeError):
"""Raised when MS Entra ID returns an error response."""
def _build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Retrieve an Azure Entra ID access token with client credentials",
)
parser.add_argument(
"--client-id",
default=os.environ.get("CB_CUSTOMER_HOSTED_CLIENT_ID"),
help="Application (client) ID. (env: CB_CUSTOMER_HOSTED_CLIENT_ID)",
)
parser.add_argument(
"--client-secret",
default=os.environ.get("CB_CUSTOMER_HOSTED_CLIENT_SECRET"),
help="Client secret (env: CB_CUSTOMER_HOSTED_CLIENT_SECRET).",
)
parser.add_argument(
"--tenant-id",
default=os.environ.get("CB_CUSTOMER_HOSTED_TENANT_ID"),
help="Directory (tenant) ID. (env: CB_CUSTOMER_HOSTED_TENANT_ID)",
)
parser.add_argument(
"--scope",
default=os.environ.get("CB_CUSTOMER_HOSTED_SCOPE"),
help="Target scope, e.g. api://<app-id>/.default (env: CB_CUSTOMER_HOSTED_SCOPE).",
)
parser.add_argument(
"--timeout",
type=float,
default=float(os.environ.get("CB_CUSTOMER_HOSTED_TIMEOUT", 10)),
help="HTTP timeout in seconds (default: 10).",
)
parser.add_argument(
"--json",
action="store_true",
help="Print raw JSON response instead of just the access token.",
)
return parser
def _build_token_url(tenant_id: str) -> str:
return DEFAULT_TOKEN_URL_TEMPLATE.format(tenant_id=tenant_id)
def fetch_token(
*, client_id: str, client_secret: str, scope: str, tenant_id: str, timeout: float
) -> dict[str, Any]:
if not client_secret:
raise ValueError(
"Client secret is required. Provide --client-secret or CB_CUSTOMER_HOSTED_CLIENT_SECRET."
)
token_url = _build_token_url(tenant_id)
data = {
"client_id": client_id,
"client_secret": client_secret,
"scope": scope,
"grant_type": "client_credentials",
}
with httpx.Client(timeout=timeout) as client:
response = client.post(token_url, data=data)
if response.is_error:
raise TokenRetrievalError(
f"Token request failed ({response.status_code}): {response.text.strip()}"
)
return response.json()
def get_chd_auth_header(
*,
client_id: str,
client_secret: str,
scope: str,
tenant_id: str,
timeout: float = 10.0,
) -> dict[str, str]:
token_scope = scope if scope.endswith("/.default") else f"{scope}/.default"
token_response = fetch_token(
client_id=client_id,
client_secret=client_secret,
scope=token_scope,
tenant_id=tenant_id,
timeout=timeout,
)
access_token = token_response.get("access_token")
if not access_token:
raise TokenRetrievalError("Token response did not contain 'access_token'")
return {"Authorization": f"Bearer {access_token}"}
def main() -> int:
parser = _build_parser()
args = parser.parse_args()
missing = []
if not args.client_id:
missing.append("--client-id / CB_CUSTOMER_HOSTED_CLIENT_ID")
if not args.client_secret:
missing.append("--client-secret / CB_CUSTOMER_HOSTED_CLIENT_SECRET")
if not args.tenant_id:
missing.append("--tenant-id / CB_CUSTOMER_HOSTED_TENANT_ID")
if not args.scope:
missing.append("--scope / CB_CUSTOMER_HOSTED_SCOPE")
if missing:
print(
f"Error: the following required parameters are missing:\n "
+ "\n ".join(missing),
file=sys.stderr,
)
return 1
try:
token_response = fetch_token(
client_id=args.client_id,
client_secret=args.client_secret,
scope=args.scope,
tenant_id=args.tenant_id,
timeout=args.timeout,
)
except (TokenRetrievalError, ValueError, httpx.HTTPError) as error:
print(f"Error: {error}", file=sys.stderr)
return 1
if args.json:
print(json.dumps(token_response, indent=2))
else:
access_token = token_response.get("access_token")
if not access_token:
print(json.dumps(token_response, indent=2))
else:
print(access_token)
return 0
if __name__ == "__main__": # pragma: no cover
sys.exit(main())
python get_customer_hosted_token.py \
--client-id "<client_id>" \
--client-secret "<client_secret>" \
--tenant-id "<tenant_id>" \
--scope "api://<client_id>/.default" \
--json
|
Parameter
|
Description
|
|---|---|
|
--client-id
|
Application (client) ID from Azure App Registration.
|
|
--client-secret
|
Client secret created earlier.
|
|
--tenant-id
|
Azure AD tenant ID.
|
|
--scope
|
API scope. This must match audience configured in helm.
|
|
--json
|
Output token in JSON format.
|
curl --location 'https://login.microsoftonline.com/<TENANT_ID>/oauth2/v2.0/token' \
--header 'Content-Type: application/x-www-form-urlencoded' \
--data-urlencode 'client_id=<CLIENT_ID>' \
--data-urlencode 'client_secret=<CLIENT_SECRET>' \
--data-urlencode 'scope=<AUDIENCE>/.default' \
--data-urlencode 'grant_type=client_credentials'
curl --location 'http://<SERVICE_IP>/cb-ai-service/requirement-evaluation/v1' --header 'Accept-Language: string' --header 'Content-Type: application/json' --header 'Accept: application/json' --header 'Authorization: Bearer <TOKEN>' --data '{
"requirement": {
"summary": "Advanced Navigation",
"description": {
"value": "Die Batteriesoll dem Bordcomputer dem Autos eine Spannung von fast 28 VDC liefern.",
"valueType": "TEXT"
}
},
"standards": [
"INCOSE_RULES_4_0"
],
"ruleFilters": {
"INCOSE_RULES_4_0": {
"en": "R34,R18-R34,R12",
"de": "R7,R33,R27"
}
}
}'
|
|
It is recommended that for new deployments, start with Pay-As-You-Go SKUs, DataZoneStandard or GlobalStandard, and switch to PTU in a subsequent Terraform apply after the deployment is stable.
PTU model provisioning are slower compared to Pay-As-You-Go deployments. When Terraform creates the Cognitive Account, private endpoint, private DNS zone, and PTU deployment in one apply, the PTU deployment takes 5-20+ minutes to provision on Azure even after Terraform reports success. During this time, the private endpoint DNS is waiting to be fully propagated for the PTU deployment, causing API calls from cb-ai-service to fail.
Error in cb-ai-service pod logs or curl:
{"event": "Error code: 403 - {'error': {'code': '403', 'message': 'Traffic is not from an approved private endpoint.'}}", "dd.trace_id": "ecfb4368512eba7e0b2a0aa20caf24a3", "dd.span_id": "563883118223141984", "timestamp": "2026-04-06 04:16:45", "level": "error", "logger": "cbai.cb_ai_service.exception_handlers"}
This is a timing issue. The PTU deployment's private link integration is still propagating while requests are sent. Most of the time the error resolves itself after 45-60 minutes, depending on Azure provisioning time.
|