选取文件
更新时间:2022-07-13
SelectObject接口支持用户对BOS中指定格式(CSV/JSON)的object内容执行SQL语句,通过SQL这种结构化查询语言对object内容进行筛选、分析、过滤之后再返回用户需要的文件内容。
使用限制见“开发者指南“-”使用及管理数据”-“选取object”部分。
查询csv文件
示例:
from baidubce import compat
import base64
# 方便理解,我们先上传一个简单的csv文件
csv_content = """
1,Maurits,2017-09-1216:32:57,685856330,-540265154.48,true
2,Iago,2018-02-01 12:25:01,-642946677,3781354659.89,false
3,Dionisio,2018-02-16 09:52:24,-3823711977,79336720.77,false
4,Aleen,2018-05-17 11:48:45,-3289131518,1499686289.41,false
5,Herschel,2019-06-04 02:28:37,3456163349,-3810272511.88,true
"""
bos_client.put_object_from_string(bucket_name, key, csv_content)
# 设置select_object()接口的参数
select_object_args = {
"expressionType": "SQL",
"inputSerialization": {
"compressionType": "NONE",
"csv": {
"fileHeaderInfo": "NONE",
"recordDelimiter": "Cg==",
"fieldDelimiter": "LA==",
"quoteCharacter": "Ig==",
"commentCharacter": "Iw=="
}
},
"outputSerialization": {
"outputHeader": False,
"csv": {
"quoteFields": "ALWAYS",
"recordDelimiter": "Cg==",
"fieldDelimiter": "LA==",
"quoteCharacter": "Ig=="
}
},
"requestProgress": {
"enabled": True
}
}
# 设置查询的sql语句,需要经过base64编码的sql语句
sql_exp = "SELECT _1, _2, _6 FROM BosObject"
select_object_args["expression"] = compat.convert_to_string(base64.standard_b64encode(compat.convert_to_bytes(sql_exp)))
# 调用select object接口
select_response = bos_client.select_object(bucket_name, key, select_object_args)
# 获取返回结果的生成器
result = select_response.result()
for msg in result:
print(msg)
if msg.headers["message-type"] == "Records":
print("type: {}, heades: {}, payload: {}, crc: {}".format(msg.type, msg.headers, msg.payload, msg.crc))
elif msg.headers["message-type"] == "Cont":
print("type: {}, heades: {}, bytes_scanned: {}, bytes_returned: {}, crc: {}".format(msg.type, msg.headers,
msg.bytes_scanned, msg.bytes_returned, msg.crc))
else:
print("type: {}, heades: {}, crc: {}".format(msg.type, msg.headers, msg.crc))
查询json文件
示例:
from baidubce import compat
import base64
# 方便理解,我们先上传一个简单的json文件
json_content = """
{
"name": "Smith",
"age": 16,
"weight": 65.5,
"org": null,
"projects":
[
{"project_name":"project1", "completed":false},
{"project_name":"project2", "completed":true}
]
}
"""
bos_client.put_object_from_string(bucket_name, key, json_content)
# 设置select_object()接口的参数
select_object_args = {
"expressionType": "SQL",
"inputSerialization": {
"compressionType": "NONE",
"json": {
"type": "DOCUMENT"
}
},
"outputSerialization": {
"json": {
"recordDelimiter": "Cg=="
}
},
"requestProgress": {
"enabled": True
}
}
# 设置查询的sql语句,需要经过base64编码的sql语句
sql_exp = "select projects from BosObject where name='Smith'"
select_object_args["expression"] = compat.convert_to_string(base64.standard_b64encode(compat.convert_to_bytes(sql_exp)))
# 调用select object接口
select_response = bos_client.select_object(bucket_name, key, select_object_args)
# 获取返回结果的生成器
result = select_response.result()
for msg in result:
print(msg)
if msg.headers["message-type"] == "Records":
print("type: {}, heades: {}, payload: {}, crc: {}".format(msg.type, msg.headers, msg.payload, msg.crc))
elif msg.headers["message-type"] == "Cont":
print("type: {}, heades: {}, bytes_scanned: {}, bytes_returned: {}, crc: {}".format(msg.type, msg.headers,
msg.bytes_scanned, msg.bytes_returned, msg.crc))
else:
print("type: {}, heades: {}, crc: {}".format(msg.type, msg.headers, msg.crc))