数据集相关
更新时间:2025-11-26
获取数据集列表
使用以下代码可以获取数据集列表。
Python
1import logging
2import os
3
4import dotenv
5
6from baidubce.bce_client_configuration import BceClientConfiguration
7from baidubce.auth.bce_credentials import BceCredentials
8from baidubce.services.aihc.aihc_client import AihcClient
9from baidubce.exception import BceHttpClientError, BceServerError
10
11# 加载环境变量(可选,推荐使用)
12dotenv.load_dotenv()
13
14logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
15logging.getLogger().setLevel(logging.INFO)
16logging.getLogger("baidubce").setLevel(logging.INFO)
17__logger = logging.getLogger(__name__)
18__logger.setLevel(logging.INFO)
19
20# 配置认证信息和服务端点
21HOST = os.getenv('HOST') or 'https://aihc.bj.baidubce.com' # 百舸AIHC服务地址
22AK = os.getenv('AK') or 'your-access-key-id' # 您的Access Key ID
23SK = os.getenv('SK') or 'your-secret-access-key' # 您的Secret Access Key
24
25# 创建BCE客户端配置
26config = BceClientConfiguration(credentials=BceCredentials(AK, SK), endpoint=HOST)
27
28# 创建 aihc client
29aihc_client = AihcClient(config)
30
31# 查询数据集列表
32try:
33 __logger.info('--------------------DescribeDatasets start--------------------')
34 response = aihc_client.dataset.DescribeDatasets()
35 print(response)
36except BceHttpClientError as e:
37 if isinstance(e.last_error, BceServerError):
38 __logger.error('send request failed. Response %s, code: %s, msg: %s'
39 % (e.last_error.status_code, e.last_error.code, str(e.last_error)))
40 else:
41 __logger.error('send request failed. Unknown exception: %s' % e)
注意: 根据接口文档去填写具体的访问参数,接口链接为获取数据集列表
获取数据集详情
使用以下代码可以获取数据集详情。
Python
1import logging
2import os
3
4import dotenv
5
6from baidubce.bce_client_configuration import BceClientConfiguration
7from baidubce.auth.bce_credentials import BceCredentials
8from baidubce.services.aihc.aihc_client import AihcClient
9from baidubce.exception import BceHttpClientError, BceServerError
10
11# 加载环境变量
12dotenv.load_dotenv()
13
14logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
15logging.getLogger().setLevel(logging.INFO)
16logging.getLogger("baidubce").setLevel(logging.INFO)
17__logger = logging.getLogger(__name__)
18__logger.setLevel(logging.INFO)
19
20# 配置认证信息
21HOST = os.getenv('HOST') or 'https://aihc.bj.baidubce.com'
22AK = os.getenv('AK') or 'your-access-key-id'
23SK = os.getenv('SK') or 'your-secret-access-key'
24
25config = BceClientConfiguration(credentials=BceCredentials(AK, SK), endpoint=HOST)
26aihc_client = AihcClient(config)
27
28# 查询数据集详情
29try:
30 __logger.info('--------------------DescribeDataset start--------------------')
31 dataset_id = "d-xxx" # 替换为实际的数据集ID
32 response = aihc_client.dataset.DescribeDataset(datasetId=dataset_id)
33 print(response)
34except BceHttpClientError as e:
35 if isinstance(e.last_error, BceServerError):
36 __logger.error('send request failed. Response %s, code: %s, msg: %s'
37 % (e.last_error.status_code, e.last_error.code, str(e.last_error)))
38 else:
39 __logger.error('send request failed. Unknown exception: %s' % e)
注意: 根据接口文档去填写具体的访问参数,接口链接为获取数据集详情
获取数据集版本列表
使用以下代码可以获取数据集版本列表。
Python
1import logging
2import os
3
4import dotenv
5
6from baidubce.bce_client_configuration import BceClientConfiguration
7from baidubce.auth.bce_credentials import BceCredentials
8from baidubce.services.aihc.aihc_client import AihcClient
9from baidubce.exception import BceHttpClientError, BceServerError
10
11# 加载环境变量
12dotenv.load_dotenv()
13
14logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
15logging.getLogger().setLevel(logging.INFO)
16logging.getLogger("baidubce").setLevel(logging.INFO)
17__logger = logging.getLogger(__name__)
18__logger.setLevel(logging.INFO)
19
20# 配置认证信息
21HOST = os.getenv('HOST') or 'https://aihc.bj.baidubce.com'
22AK = os.getenv('AK') or 'your-access-key-id'
23SK = os.getenv('SK') or 'your-secret-access-key'
24
25config = BceClientConfiguration(credentials=BceCredentials(AK, SK), endpoint=HOST)
26aihc_client = AihcClient(config)
27
28# 获取数据集版本列表
29try:
30 __logger.info('--------------------DescribeDatasetVersions start--------------------')
31 dataset_id = "d-xxx" # 替换为实际的数据集ID
32 response = aihc_client.dataset.DescribeDatasetVersions(datasetId=dataset_id)
33 print(response)
34except BceHttpClientError as e:
35 if isinstance(e.last_error, BceServerError):
36 __logger.error('send request failed. Response %s, code: %s, msg: %s'
37 % (e.last_error.status_code, e.last_error.code, str(e.last_error)))
38 else:
39 __logger.error('send request failed. Unknown exception: %s' % e)
注意: 根据接口文档去填写具体的访问参数,接口链接为获取数据集版本列表
获取数据集版本详情
使用以下代码可以获取数据集版本详情。
Python
1import logging
2import os
3
4import dotenv
5
6from baidubce.bce_client_configuration import BceClientConfiguration
7from baidubce.auth.bce_credentials import BceCredentials
8from baidubce.services.aihc.aihc_client import AihcClient
9from baidubce.exception import BceHttpClientError, BceServerError
10
11# 加载环境变量
12dotenv.load_dotenv()
13
14logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
15logging.getLogger().setLevel(logging.INFO)
16logging.getLogger("baidubce").setLevel(logging.INFO)
17__logger = logging.getLogger(__name__)
18__logger.setLevel(logging.INFO)
19
20# 配置认证信息
21HOST = os.getenv('HOST') or 'https://aihc.bj.baidubce.com'
22AK = os.getenv('AK') or 'your-access-key-id'
23SK = os.getenv('SK') or 'your-secret-access-key'
24
25config = BceClientConfiguration(credentials=BceCredentials(AK, SK), endpoint=HOST)
26aihc_client = AihcClient(config)
27
28# 获取数据集版本详情
29try:
30 __logger.info('--------------------DescribeDatasetVersion start--------------------')
31 dataset_id = "d-xxx" # 替换为实际的数据集ID
32 version_id = "v-xxx" # 替换为实际的数据集版本ID
33 response = aihc_client.dataset.DescribeDatasetVersion(datasetId=dataset_id, versionId=version_id)
34 print(response)
35except BceHttpClientError as e:
36 if isinstance(e.last_error, BceServerError):
37 __logger.error('send request failed. Response %s, code: %s, msg: %s'
38 % (e.last_error.status_code, e.last_error.code, str(e.last_error)))
39 else:
40 __logger.error('send request failed. Unknown exception: %s' % e)
注意: 根据接口文档去填写具体的访问参数,接口链接为获取数据集版本详情
修改数据集
使用以下代码可以修改数据集。
Python
1import logging
2import os
3
4import dotenv
5
6from baidubce.bce_client_configuration import BceClientConfiguration
7from baidubce.auth.bce_credentials import BceCredentials
8from baidubce.services.aihc.aihc_client import AihcClient
9from baidubce.exception import BceHttpClientError, BceServerError
10
11# 加载环境变量
12dotenv.load_dotenv()
13
14logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
15logging.getLogger().setLevel(logging.INFO)
16logging.getLogger("baidubce").setLevel(logging.INFO)
17__logger = logging.getLogger(__name__)
18__logger.setLevel(logging.INFO)
19
20# 配置认证信息
21HOST = os.getenv('HOST') or 'https://aihc.bj.baidubce.com'
22AK = os.getenv('AK') or 'your-access-key-id'
23SK = os.getenv('SK') or 'your-secret-access-key'
24
25config = BceClientConfiguration(credentials=BceCredentials(AK, SK), endpoint=HOST)
26aihc_client = AihcClient(config)
27
28# 修改数据集
29try:
30 __logger.info('--------------------ModifyDataset start--------------------')
31 dataset_id = "d-xxx" # 替换为实际的数据集ID
32 response = aihc_client.dataset.ModifyDataset(datasetId=dataset_id, name='test-dataset-xxx')
33 print(response)
34except BceHttpClientError as e:
35 if isinstance(e.last_error, BceServerError):
36 __logger.error('send request failed. Response %s, code: %s, msg: %s'
37 % (e.last_error.status_code, e.last_error.code, str(e.last_error)))
38 else:
39 __logger.error('send request failed. Unknown exception: %s' % e)
注意: 根据接口文档去填写具体的访问参数,接口链接为修改数据集
删除数据集
使用以下代码可以删除数据集。
Python
1import logging
2import os
3
4import dotenv
5
6from baidubce.bce_client_configuration import BceClientConfiguration
7from baidubce.auth.bce_credentials import BceCredentials
8from baidubce.services.aihc.aihc_client import AihcClient
9from baidubce.exception import BceHttpClientError, BceServerError
10
11# 加载环境变量
12dotenv.load_dotenv()
13
14logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
15logging.getLogger().setLevel(logging.INFO)
16logging.getLogger("baidubce").setLevel(logging.INFO)
17__logger = logging.getLogger(__name__)
18__logger.setLevel(logging.INFO)
19
20# 配置认证信息
21HOST = os.getenv('HOST') or 'https://aihc.bj.baidubce.com'
22AK = os.getenv('AK') or 'your-access-key-id'
23SK = os.getenv('SK') or 'your-secret-access-key'
24
25config = BceClientConfiguration(credentials=BceCredentials(AK, SK), endpoint=HOST)
26aihc_client = AihcClient(config)
27
28# 删除数据集
29try:
30 __logger.info('--------------------DeleteDataset start--------------------')
31 dataset_id = "d-xxx" # 替换为实际的数据集ID
32 response = aihc_client.dataset.DeleteDataset(datasetId=dataset_id)
33 print(response)
34except BceHttpClientError as e:
35 if isinstance(e.last_error, BceServerError):
36 __logger.error('send request failed. Response %s, code: %s, msg: %s'
37 % (e.last_error.status_code, e.last_error.code, str(e.last_error)))
38 else:
39 __logger.error('send request failed. Unknown exception: %s' % e)
注意: 根据接口文档去填写具体的访问参数,接口链接为删除数据集
创建数据集
使用以下代码可以创建数据集。
Python
1import logging
2import os
3
4import dotenv
5
6from baidubce.bce_client_configuration import BceClientConfiguration
7from baidubce.auth.bce_credentials import BceCredentials
8from baidubce.services.aihc.aihc_client import AihcClient
9from baidubce.services.aihc.modules.dataset.dataset_model import DatasetVersionEntry
10from baidubce.exception import BceHttpClientError, BceServerError
11
12# 加载环境变量
13dotenv.load_dotenv()
14
15logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
16logging.getLogger().setLevel(logging.INFO)
17logging.getLogger("baidubce").setLevel(logging.INFO)
18__logger = logging.getLogger(__name__)
19__logger.setLevel(logging.INFO)
20
21# 配置认证信息
22HOST = os.getenv('HOST') or 'https://aihc.bj.baidubce.com'
23AK = os.getenv('AK') or 'your-access-key-id'
24SK = os.getenv('SK') or 'your-secret-access-key'
25
26config = BceClientConfiguration(credentials=BceCredentials(AK, SK), endpoint=HOST)
27aihc_client = AihcClient(config)
28
29# 创建数据集
30try:
31 __logger.info('--------------------CreateDataset start--------------------')
32 response = aihc_client.dataset.CreateDataset(
33 name='sdk-test-dataset-xxx-xxx',
34 storageType='PFS',
35 storageInstance='pfs-xxx',
36 importFormat='FOLDER',
37 visibilityScope='ALL_PEOPLE',
38 initVersionEntry=DatasetVersionEntry(
39 storagePath='/',
40 mountPath='/xx'
41 )
42 )
43 print(response)
44except BceHttpClientError as e:
45 if isinstance(e.last_error, BceServerError):
46 __logger.error('send request failed. Response %s, code: %s, msg: %s'
47 % (e.last_error.status_code, e.last_error.code, str(e.last_error)))
48 else:
49 __logger.error('send request failed. Unknown exception: %s' % e)
注意: 根据接口文档去填写具体的访问参数,接口链接为创建数据集
创建数据集版本
使用以下代码可以创建数据集版本。
Python
1import logging
2import os
3
4import dotenv
5
6from baidubce.bce_client_configuration import BceClientConfiguration
7from baidubce.auth.bce_credentials import BceCredentials
8from baidubce.services.aihc.aihc_client import AihcClient
9from baidubce.exception import BceHttpClientError, BceServerError
10
11# 加载环境变量
12dotenv.load_dotenv()
13
14logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
15logging.getLogger().setLevel(logging.INFO)
16logging.getLogger("baidubce").setLevel(logging.INFO)
17__logger = logging.getLogger(__name__)
18__logger.setLevel(logging.INFO)
19
20# 配置认证信息
21HOST = os.getenv('HOST') or 'https://aihc.bj.baidubce.com'
22AK = os.getenv('AK') or 'your-access-key-id'
23SK = os.getenv('SK') or 'your-secret-access-key'
24
25config = BceClientConfiguration(credentials=BceCredentials(AK, SK), endpoint=HOST)
26aihc_client = AihcClient(config)
27
28# 创建数据集版本
29try:
30 __logger.info('--------------------CreateDatasetVersion start--------------------')
31 dataset_id = "d-xxx" # 替换为实际的数据集ID
32 response = aihc_client.dataset.CreateDatasetVersion(
33 datasetId=dataset_id, storagePath='/xxx-xxx',
34 mountPath='/xxx/xxx', description='Test dataset version'
35 )
36 print(response)
37except BceHttpClientError as e:
38 if isinstance(e.last_error, BceServerError):
39 __logger.error('send request failed. Response %s, code: %s, msg: %s'
40 % (e.last_error.status_code, e.last_error.code, str(e.last_error)))
41 else:
42 __logger.error('send request failed. Unknown exception: %s' % e)
注意: 根据接口文档去填写具体的访问参数,接口链接为创建数据集版本
删除数据集版本
使用以下代码可以删除数据集版本。
Python
1import logging
2import os
3
4import dotenv
5
6from baidubce.bce_client_configuration import BceClientConfiguration
7from baidubce.auth.bce_credentials import BceCredentials
8from baidubce.services.aihc.aihc_client import AihcClient
9from baidubce.exception import BceHttpClientError, BceServerError
10
11# 加载环境变量
12dotenv.load_dotenv()
13
14logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)
15logging.getLogger().setLevel(logging.INFO)
16logging.getLogger("baidubce").setLevel(logging.INFO)
17__logger = logging.getLogger(__name__)
18__logger.setLevel(logging.INFO)
19
20# 配置认证信息
21HOST = os.getenv('HOST') or 'https://aihc.bj.baidubce.com'
22AK = os.getenv('AK') or 'your-access-key-id'
23SK = os.getenv('SK') or 'your-secret-access-key'
24
25config = BceClientConfiguration(credentials=BceCredentials(AK, SK), endpoint=HOST)
26aihc_client = AihcClient(config)
27
28# 删除数据集版本
29try:
30 __logger.info('--------------------DeleteDatasetVersion start--------------------')
31 dataset_id = "d-xxx" # 替换为实际的数据集ID
32 version_id = "v-xxx" # 替换为实际的数据集版本ID
33 response = aihc_client.dataset.DeleteDatasetVersion(datasetId=dataset_id, versionId=version_id)
34 print(response)
35except BceHttpClientError as e:
36 if isinstance(e.last_error, BceServerError):
37 __logger.error('send request failed. Response %s, code: %s, msg: %s'
38 % (e.last_error.status_code, e.last_error.code, str(e.last_error)))
39 else:
40 __logger.error('send request failed. Unknown exception: %s' % e)
注意: 根据接口文档去填写具体的访问参数,接口链接为删除数据集版本
