Свести данные массива Json в python
Я имею дело со структурой JSON, которая выводится мне в таких структурах:
{
"time":"2015-10-20T20:15:00.847Z",
"name":"meta.response.ean",
"level":"info",
"data1":{
"HotelListResponse":{
"customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
"numberOfRoomsRequested":1,
"moreResultsAvailable":true,
"cacheKey":"-705f6d43:15086db3fd1:-4c58",
"cacheLocation":"10.178.144.36:7300",
"HotelList":{
"@size":2,
"@activePropertyCount":2,
"HotelSummary":[
{
"hotelId":132684,
"city":"Seattle",
"highRate":159.0,
"lowRate":159.0,
"rateCurrencyCode":"USD",
"RoomRateDetailsList":{
"RoomRateDetails":{
"roomTypeCode":10351,
"rateCode":10351,
"roomDescription":"Standard Room, 1 Queen Bed",
"RateInfos":{
"RateInfo":{
"@promo":false,
"ChargeableRateInfo":{
"@averageBaseRate":159.0,
"@averageRate":159.0,
"@currencyCode":"USD",
"@nightlyRateTotal":159.0,
"@surchargeTotal":26.81,
"@total":185.81
}
}
}
}
}
},
{
"hotelId":263664,
"city":"Las Vegas",
"highRate":135.0,
"lowRate":94.5,
"rateCurrencyCode":"USD",
"RoomRateDetailsList":{
"RoomRateDetails":{
"roomTypeCode":373685,
"rateCode":1238953,
"roomDescription":"Standard Room, 1 King Bed",
"RateInfos":{
"RateInfo":{
"@promo":true,
"ChargeableRateInfo":{
"@averageBaseRate":135.0,
"@averageRate":94.5,
"@currencyCode":"USD",
"@nightlyRateTotal":94.5,
"@surchargeTotal":9.45,
"@total":103.95
}
}
}
}
}
}
]
}
}
},
"context":{
"X-Request-Id":"dca47992-b6cc-4b87-956c-90523c0bf3bb",
"host":"getaways-search-app2",
"thread":"http-nio-80-exec-12"
}
}
Как видите, это вложенные массивы. Существует много дискуссий по поводу их рекурсивного сглаживания. Я не могу сгладить массивы в "HotelSummary". Есть идеи?
1 ответ
Использовать pandas
& json_normalize
:
record_path
параметр для основногоkey
сплющиватьmeta
параметр для дополнительныхkeys
сплющиватьjson_normalize
создает имена столбцов, включающие всеkeys
к желаемомуkey
, следовательно, длинные имена столбцов (например,RoomRateDetailsList.RoomRateDetails.roomTypeCode
)- Длинные имена столбцов необходимо переименовать в более короткие версии
- А
dict
понимание используется для созданияrename
dict
. - Следующий код использует преимущества
pathlib
библиотека .open
это методpathlib
- Также работает с путями, отличными от Windows
import pandas as pd
import json
from pathlib import Path
# path to file
p = Path(r'c:\some_path_to_file\test.json')
# read json file
with p.open('r', encoding='utf-8') as f:
data = json.loads(f.read())
# create dataframe
df = pd.json_normalize(data,
record_path=['data1', 'HotelListResponse', 'HotelList', 'HotelSummary'],
meta=[['data1', 'HotelListResponse', 'customerSessionId'],
['data1', 'HotelListResponse', 'numberOfRoomsRequested'],
['data1', 'HotelListResponse', 'moreResultsAvailable'],
['data1', 'HotelListResponse', 'cacheKey'],
['data1', 'HotelListResponse', 'cacheLocation'],
['data1', 'HotelListResponse', 'HotelList', '@size'],
['data1', 'HotelListResponse', 'HotelList', '@activePropertyCount']])
# rename columns:
rename = {value: value.split('.')[-1].replace('@', '') for value in df.columns}
df.rename(columns=rename, inplace=True)
# dataframe view
hotelId city highRate lowRate rateCurrencyCode roomTypeCode rateCode roomDescription promo averageBaseRate averageRate currencyCode nightlyRateTotal surchargeTotal total customerSessionId numberOfRoomsRequested moreResultsAvailable cacheKey cacheLocation size activePropertyCount
132684 Seattle 159.0 159.0 USD 10351 10351 Standard Room, 1 Queen Bed False 159.0 159.0 USD 159.0 26.81 185.81 0AB29024-F6D4-3915-0862-DB3FD1904C5A 1 True -705f6d43:15086db3fd1:-4c58 10.178.144.36:7300 2 2
263664 Las Vegas 135.0 94.5 USD 373685 1238953 Standard Room, 1 King Bed True 135.0 94.5 USD 94.5 9.45 103.95 0AB29024-F6D4-3915-0862-DB3FD1904C5A 1 True -705f6d43:15086db3fd1:-4c58 10.178.144.36:7300 2 2
# save to JSON
df.to_json('out.json', orient='records')
Окончательный вывод JSON:
[{
"hotelId": 132684,
"city": "Seattle",
"highRate": 159.0,
"lowRate": 159.0,
"rateCurrencyCode": "USD",
"roomTypeCode": 10351,
"rateCode": 10351,
"roomDescription": "Standard Room, 1 Queen Bed",
"promo": false,
"averageBaseRate": 159.0,
"averageRate": 159.0,
"currencyCode": "USD",
"nightlyRateTotal": 159.0,
"surchargeTotal": 26.81,
"total": 185.81,
"customerSessionId": "0AB29024-F6D4-3915-0862-DB3FD1904C5A",
"numberOfRoomsRequested": 1,
"moreResultsAvailable": true,
"cacheKey": "-705f6d43:15086db3fd1:-4c58",
"cacheLocation": "10.178.144.36:7300",
"size": 2,
"activePropertyCount": 2
}, {
"hotelId": 263664,
"city": "Las Vegas",
"highRate": 135.0,
"lowRate": 94.5,
"rateCurrencyCode": "USD",
"roomTypeCode": 373685,
"rateCode": 1238953,
"roomDescription": "Standard Room, 1 King Bed",
"promo": true,
"averageBaseRate": 135.0,
"averageRate": 94.5,
"currencyCode": "USD",
"nightlyRateTotal": 94.5,
"surchargeTotal": 9.45,
"total": 103.95,
"customerSessionId": "0AB29024-F6D4-3915-0862-DB3FD1904C5A",
"numberOfRoomsRequested": 1,
"moreResultsAvailable": true,
"cacheKey": "-705f6d43:15086db3fd1:-4c58",
"cacheLocation": "10.178.144.36:7300",
"size": 2,
"activePropertyCount": 2
}
]
{
"HotelListResponse":{
"customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
"numberOfRoomsRequested":1,
"moreResultsAvailable":"true",
"cacheKey":"-705f6d43:15086db3fd1:-4c58",
"cacheLocation":"10.178.144.36:7300",
"HotelList":{
"size":2,
"activePropertyCount":2,
"HotelSummary":[
{
"hotelId":132684,
"city":"Seattle",
"highRate":159.0,
"lowRate":159.0,
"rateCurrencyCode":"USD",
"RoomRateDetailsList":{
"RoomRateDetails":{
"roomTypeCode":10351,
"rateCode":10351,
"roomDescription":"Standard Room, 1 Queen Bed",
"RateInfos":{
"RateInfo":{
"promo":"false",
"ChargeableRateInfo":{
"averageBaseRate":159.0,
"averageRate":159.0,
"currencyCode":"USD",
"nightlyRateTotal":159.0,
"surchargeTotal":26.81,
"total":185.81
}
}
}
}
}
},
{
"hotelId":263664,
"city":"Las Vegas",
"highRate":135.0,
"lowRate":94.5,
"rateCurrencyCode":"USD",
"RoomRateDetailsList":{
"RoomRateDetails":{
"roomTypeCode":373685,
"rateCode":1238953,
"roomDescription":"Standard Room, 1 King Bed",
"RateInfos":{
"RateInfo":{
"promo":"true",
"ChargeableRateInfo":{
"averageBaseRate":135.0,
"averageRate":94.5,
"currencyCode":"USD",
"nightlyRateTotal":94.5,
"surchargeTotal":9.45,
"total":103.95
}
}
}
}
}
}
]
}
}
}
Я хочу сгладить данные в формате ниже
{
"customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
"numberOfRoomsRequested":1,
"moreResultsAvailable":"true",
"cacheKey":"-705f6d43:15086db3fd1:-4c58",
"cacheLocation":"10.178.144.36:7300",
"size":2,
"activePropertyCount":2,
"hotelId":132684,
"city":"Seattle",
"highRate":159.0,
"lowRate":159.0,
"rateCurrencyCode":"USD",
"roomTypeCode":10351,
"rateCode":10351,
"roomDescription":"Standard Room, 1 Queen Bed",
"promo":"false",
"averageBaseRate":159.0,
"averageRate":159.0,
"currencyCode":"USD",
"nightlyRateTotal":159.0,
"surchargeTotal":26.81,
"total":185.81
}
{
"customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
"numberOfRoomsRequested":1,
"moreResultsAvailable":"true",
"cacheKey":"-705f6d43:15086db3fd1:-4c58",
"cacheLocation":"10.178.144.36:7300",
"size":2,
"activePropertyCount":2,
"hotelId":263664,
"city":"Las Vegas",
"highRate":135.0,
"lowRate":94.5,
"rateCurrencyCode":"USD",
"roomTypeCode":373685,
"rateCode":1238953,
"roomDescription":"Standard Room, 1 King Bed",
"promo":"true",
"averageBaseRate":135.0,
"averageRate":94.5,
"currencyCode":"USD",
"nightlyRateTotal":94.5,
"surchargeTotal":9.45,
"total":103.95
}
Я попытался с помощью класса flattenDict. Почему-то я не получаю вывод в нужном формате. Ниже мой код
def flattenDict(d, result=None):
if result is None:
result = {}
for key in d:
value = d[key]
if isinstance(value, dict):
value1 = {}
for keyIn in value:
value1[".".join([key,keyIn])]=value[keyIn]
flattenDict(value1, result)
elif isinstance(value, (list, tuple)):
for indexB, element in enumerate(value):
if isinstance(element, dict):
value1 = {}
index = 0
for keyIn in element:
newkey = ".".join([key,keyIn])
value1[".".join([key,keyIn])]=value[indexB][keyIn]
index += 1
for keyA in value1:
flattenDict(value1, result)
else:
result[key]=value
return result