Свести данные массива Json в python

Я имею дело со структурой JSON, которая выводится мне в таких структурах:

{  
  "time":"2015-10-20T20:15:00.847Z",
  "name":"meta.response.ean",
  "level":"info",
  "data1":{  
     "HotelListResponse":{  
        "customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
        "numberOfRoomsRequested":1,
        "moreResultsAvailable":true,
        "cacheKey":"-705f6d43:15086db3fd1:-4c58",
        "cacheLocation":"10.178.144.36:7300",
        "HotelList":{  
           "@size":2,
           "@activePropertyCount":2,
           "HotelSummary":[  
              {  
                 "hotelId":132684,
                 "city":"Seattle",
                 "highRate":159.0,
                 "lowRate":159.0,
                 "rateCurrencyCode":"USD",
                 "RoomRateDetailsList":{  
                    "RoomRateDetails":{  
                       "roomTypeCode":10351,
                       "rateCode":10351,
                       "roomDescription":"Standard Room, 1 Queen Bed",
                       "RateInfos":{  
                          "RateInfo":{  
                             "@promo":false,
                             "ChargeableRateInfo":{  
                                "@averageBaseRate":159.0,
                                "@averageRate":159.0,
                                "@currencyCode":"USD",
                                "@nightlyRateTotal":159.0,
                                "@surchargeTotal":26.81,
                                "@total":185.81
                             }
                          }
                       }
                    }
                 }
              },
              {  
                 "hotelId":263664,
                 "city":"Las Vegas",
                 "highRate":135.0,
                 "lowRate":94.5,
                 "rateCurrencyCode":"USD",
                 "RoomRateDetailsList":{  
                    "RoomRateDetails":{  
                       "roomTypeCode":373685,
                       "rateCode":1238953,
                       "roomDescription":"Standard Room, 1 King Bed",
                       "RateInfos":{  
                          "RateInfo":{  
                             "@promo":true,
                             "ChargeableRateInfo":{  
                                "@averageBaseRate":135.0,
                                "@averageRate":94.5,
                                "@currencyCode":"USD",
                                "@nightlyRateTotal":94.5,
                                "@surchargeTotal":9.45,
                                "@total":103.95
                             }
                          }
                       }
                    }
                 }
              }
           ]
        }
     }
  },
  "context":{  
     "X-Request-Id":"dca47992-b6cc-4b87-956c-90523c0bf3bb",
     "host":"getaways-search-app2",
     "thread":"http-nio-80-exec-12"
  }
}

Как видите, это вложенные массивы. Существует много дискуссий по поводу их рекурсивного сглаживания. Я не могу сгладить массивы в "HotelSummary". Есть идеи?

1 ответ

Использовать pandas & json_normalize:

  • record_path параметр для основного key сплющивать
  • meta параметр для дополнительных keys сплющивать
  • json_normalize создает имена столбцов, включающие все keys к желаемому key, следовательно, длинные имена столбцов (например, RoomRateDetailsList.RoomRateDetails.roomTypeCode)
  • Длинные имена столбцов необходимо переименовать в более короткие версии
  • А dict понимание используется для создания rename dict.
  • Следующий код использует преимущества pathlib библиотека
  • .open это метод pathlib
  • Также работает с путями, отличными от Windows
import pandas as pd
import json
from pathlib import Path


# path to file
p = Path(r'c:\some_path_to_file\test.json')

# read json file
with p.open('r', encoding='utf-8') as f:
    data = json.loads(f.read())

# create dataframe
df = pd.json_normalize(data,
                    record_path=['data1', 'HotelListResponse', 'HotelList', 'HotelSummary'],
                    meta=[['data1', 'HotelListResponse', 'customerSessionId'],
                          ['data1', 'HotelListResponse', 'numberOfRoomsRequested'],
                          ['data1', 'HotelListResponse', 'moreResultsAvailable'],
                          ['data1', 'HotelListResponse', 'cacheKey'],
                          ['data1', 'HotelListResponse', 'cacheLocation'],
                          ['data1', 'HotelListResponse', 'HotelList', '@size'],
                          ['data1', 'HotelListResponse', 'HotelList', '@activePropertyCount']])

# rename columns:
rename = {value: value.split('.')[-1].replace('@', '') for value in df.columns}
df.rename(columns=rename, inplace=True)

# dataframe view
 hotelId       city  highRate  lowRate rateCurrencyCode  roomTypeCode  rateCode             roomDescription  promo  averageBaseRate  averageRate currencyCode  nightlyRateTotal  surchargeTotal   total                     customerSessionId numberOfRoomsRequested moreResultsAvailable                     cacheKey       cacheLocation size activePropertyCount
  132684    Seattle     159.0    159.0              USD         10351     10351  Standard Room, 1 Queen Bed  False            159.0        159.0          USD             159.0           26.81  185.81  0AB29024-F6D4-3915-0862-DB3FD1904C5A                      1                 True  -705f6d43:15086db3fd1:-4c58  10.178.144.36:7300    2                   2
  263664  Las Vegas     135.0     94.5              USD        373685   1238953   Standard Room, 1 King Bed   True            135.0         94.5          USD              94.5            9.45  103.95  0AB29024-F6D4-3915-0862-DB3FD1904C5A                      1                 True  -705f6d43:15086db3fd1:-4c58  10.178.144.36:7300    2                   2

# save to JSON
df.to_json('out.json', orient='records')

Окончательный вывод JSON:

[{
        "hotelId": 132684,
        "city": "Seattle",
        "highRate": 159.0,
        "lowRate": 159.0,
        "rateCurrencyCode": "USD",
        "roomTypeCode": 10351,
        "rateCode": 10351,
        "roomDescription": "Standard Room, 1 Queen Bed",
        "promo": false,
        "averageBaseRate": 159.0,
        "averageRate": 159.0,
        "currencyCode": "USD",
        "nightlyRateTotal": 159.0,
        "surchargeTotal": 26.81,
        "total": 185.81,
        "customerSessionId": "0AB29024-F6D4-3915-0862-DB3FD1904C5A",
        "numberOfRoomsRequested": 1,
        "moreResultsAvailable": true,
        "cacheKey": "-705f6d43:15086db3fd1:-4c58",
        "cacheLocation": "10.178.144.36:7300",
        "size": 2,
        "activePropertyCount": 2
    }, {
        "hotelId": 263664,
        "city": "Las Vegas",
        "highRate": 135.0,
        "lowRate": 94.5,
        "rateCurrencyCode": "USD",
        "roomTypeCode": 373685,
        "rateCode": 1238953,
        "roomDescription": "Standard Room, 1 King Bed",
        "promo": true,
        "averageBaseRate": 135.0,
        "averageRate": 94.5,
        "currencyCode": "USD",
        "nightlyRateTotal": 94.5,
        "surchargeTotal": 9.45,
        "total": 103.95,
        "customerSessionId": "0AB29024-F6D4-3915-0862-DB3FD1904C5A",
        "numberOfRoomsRequested": 1,
        "moreResultsAvailable": true,
        "cacheKey": "-705f6d43:15086db3fd1:-4c58",
        "cacheLocation": "10.178.144.36:7300",
        "size": 2,
        "activePropertyCount": 2
    }
]
{  
   "HotelListResponse":{  
      "customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
      "numberOfRoomsRequested":1,
      "moreResultsAvailable":"true",
      "cacheKey":"-705f6d43:15086db3fd1:-4c58",
      "cacheLocation":"10.178.144.36:7300",
      "HotelList":{  
         "size":2,
         "activePropertyCount":2,
         "HotelSummary":[  
            {  
               "hotelId":132684,
               "city":"Seattle",
               "highRate":159.0,
               "lowRate":159.0,
               "rateCurrencyCode":"USD",
               "RoomRateDetailsList":{  
                  "RoomRateDetails":{  
                     "roomTypeCode":10351,
                     "rateCode":10351,
                     "roomDescription":"Standard Room, 1 Queen Bed",
                     "RateInfos":{  
                        "RateInfo":{  
                           "promo":"false",
                           "ChargeableRateInfo":{  
                              "averageBaseRate":159.0,
                              "averageRate":159.0,
                              "currencyCode":"USD",
                              "nightlyRateTotal":159.0,
                              "surchargeTotal":26.81,
                              "total":185.81
                           }
                        }
                     }
                  }
               }
            },
            {  
               "hotelId":263664,
               "city":"Las Vegas",
               "highRate":135.0,
               "lowRate":94.5,
               "rateCurrencyCode":"USD",
               "RoomRateDetailsList":{  
                  "RoomRateDetails":{  
                     "roomTypeCode":373685,
                     "rateCode":1238953,
                     "roomDescription":"Standard Room, 1 King Bed",
                     "RateInfos":{  
                        "RateInfo":{  
                           "promo":"true",
                           "ChargeableRateInfo":{  
                              "averageBaseRate":135.0,
                              "averageRate":94.5,
                              "currencyCode":"USD",
                              "nightlyRateTotal":94.5,
                              "surchargeTotal":9.45,
                              "total":103.95
                           }
                        }
                     }
                  }
               }
            }
         ]
      }
   }
}

Я хочу сгладить данные в формате ниже

{  
   "customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
   "numberOfRoomsRequested":1,
   "moreResultsAvailable":"true",
   "cacheKey":"-705f6d43:15086db3fd1:-4c58",
   "cacheLocation":"10.178.144.36:7300",
   "size":2,
   "activePropertyCount":2,
   "hotelId":132684,
   "city":"Seattle",
   "highRate":159.0,
   "lowRate":159.0,
   "rateCurrencyCode":"USD",
   "roomTypeCode":10351,
   "rateCode":10351,
   "roomDescription":"Standard Room, 1 Queen Bed",
   "promo":"false",
   "averageBaseRate":159.0,
   "averageRate":159.0,
   "currencyCode":"USD",
   "nightlyRateTotal":159.0,
   "surchargeTotal":26.81,
   "total":185.81
}


{  
   "customerSessionId":"0AB29024-F6D4-3915-0862-DB3FD1904C5A",
   "numberOfRoomsRequested":1,
   "moreResultsAvailable":"true",
   "cacheKey":"-705f6d43:15086db3fd1:-4c58",
   "cacheLocation":"10.178.144.36:7300",
   "size":2,
   "activePropertyCount":2,
   "hotelId":263664,
   "city":"Las Vegas",
   "highRate":135.0,
   "lowRate":94.5,
   "rateCurrencyCode":"USD",
   "roomTypeCode":373685,
   "rateCode":1238953,
   "roomDescription":"Standard Room, 1 King Bed",
   "promo":"true",
   "averageBaseRate":135.0,
   "averageRate":94.5,
   "currencyCode":"USD",
   "nightlyRateTotal":94.5,
   "surchargeTotal":9.45,
   "total":103.95
}

Я попытался с помощью класса flattenDict. Почему-то я не получаю вывод в нужном формате. Ниже мой код

def flattenDict(d, result=None):
    if result is None:
        result = {}
    for key in d:
        value = d[key]
        if isinstance(value, dict):
            value1 = {}
            for keyIn in value:
                value1[".".join([key,keyIn])]=value[keyIn]
            flattenDict(value1, result)
        elif isinstance(value, (list, tuple)):   
            for indexB, element in enumerate(value):
                if isinstance(element, dict):
                    value1 = {}
                    index = 0
                    for keyIn in element:
                        newkey = ".".join([key,keyIn])        
                        value1[".".join([key,keyIn])]=value[indexB][keyIn]
                        index += 1
                    for keyA in value1:
                        flattenDict(value1, result)   
        else:
            result[key]=value
    return result
Другие вопросы по тегам