Поддержка CRUD для списка Dicts

Моя цель - добавить поддержку Weaviate в проект pyLodStorage. В частности, я хотел бы использовать образцы данных из:

https://github.com/WolfgangFahl/pyLoDStorage/blob/master/lodstorage/sample.py

Который имеет

  • несколько записей лиц из королевской семьи
  • список городов из нескольких тысяч записей
  • искусственный список записей с любым количеством записей

в качестве примеров.

Все данные в виде таблиц. Некоторые основные типы Python, такие как:

  • ул
  • bool
  • int
  • плавать
  • свидание
  • дата и время

нуждаются в поддержке.

Я создал проект http://wiki.bitplan.com/index.php/DgraphAndWeaviateTest и скрипт для запуска Weaviate через docker compose. Существует модульный тест Python, который использовался для работы с клиентом Weaviate Python 0.4.1.

Я пытаюсь использовать информацию из https://www.semi.technology/documentation/weaviate/current/how-tos/how-to-create-a-schema.html для рефакторинга этого модульного теста, но не знаю, как сделать это.

Что нужно сделать, чтобы тесты CRUD работали, как, например, в трех других тестах:https://github.com/WolfgangFahl/pyLoDStorage/tree/master/tests для

  • JSON
  • SPARQL
  • SQL

Меня особенно интересует "двусторонняя" обработка списка dicts (также известного как "Таблица") со стандартными типами данных, указанными выше. Итак, я хотел бы создать список dicts, а затем:

  • получить схему автоматически, просмотрев некоторые образцы записей
  • проверьте, существует ли уже схема и удалите ли ее
  • создать схему
  • проверьте, если данные уже выходят, и если удалите их
  • добавить данные и сохранить их
  • необязательно сохранить схему для дальнейшего использования
  • восстановить данные с использованием или без использования информации схемы

убедитесь, что восстановленные данные (список Dicts) совпадают с исходными данными

    Created on 2020-07-24
    
    @author: wf
    '''
    import unittest
    import weaviate
    import time
    #import getpass
    
    class TestWeaviate(unittest.TestCase):
    # https://www.semi.technology/documentation/weaviate/current/client-libs/python.html
    
        def setUp(self):
            self.port=8153
            self.host="localhost"
            #if getpass.getuser()=="wf":
            #    self.host="zeus"
            #    self.port=8080
            pass
        
        def getClient(self):
            self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
            return self.client
    
        def tearDown(self):
            pass
            
        def testRunning(self):
            '''
            make sure weaviate is running
            '''
            w=self.getClient()
            self.assertTrue(w.is_live())
            self.assertTrue(w.is_ready())
                
    
        def testWeaviateSchema(self):
            ''' see https://www.semi.technology/documentation/weaviate/current/client-libs/python.html '''
            w = self.getClient()
            #contains_schema = w.schema.contains()
            try:
                w.create_schema("https://raw.githubusercontent.com/semi-technologies/weaviate-python-client/master/documentation/getting_started/people_schema.json")
            except:
                pass
            entries=[
               [ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
               [ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
               [ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
            ]
            for entry in entries:
                dict,type,uid=entry
                try:
                    w.create(dict,type,uid)
                except weaviate.exceptions.ThingAlreadyExistsException as taee:
                    print ("%s already created" % dict['name'])
                
            pass
        
        def testPersons(self):
            return
            w = self.getClient()
    
            schema = {
            "actions": {"classes": [],"type": "action"},
            "things": {"classes": [{
                "class": "Person",
                "description": "A person such as humans or personality known through culture",
                "properties": [
                    {
                        "cardinality": "atMostOne",
                        "dataType": ["text"],
                        "description": "The name of this person",
                        "name": "name"
                    }
                ]}],
                "type": "thing"
            }
            }
            w.create_schema(schema)
            
            w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
            w.create_thing({"name": "Alan Turing"}, "Person")
            w.create_thing({"name": "John von Neumann"}, "Person")
            w.create_thing({"name": "Tim Berners-Lee"}, "Person")
            
        def testEventSchema(self):    
            '''
            https://stackru.com/a/63077495/1497139
            '''
            return
            schema = {
              "things": {
                "type": "thing",
                "classes": [
                  {
                    "class": "Event",
                    "description": "event",
                    "properties": [
                      {
                        "name": "acronym",
                        "description": "acronym",
                        "dataType": [
                          "text"
                        ]
                      },
                      {
                        "name": "inCity",
                        "description": "city reference",
                        "dataType": [
                          "City"
                        ],
                        "cardinality": "many"
                      }
                    ]
                  },
                  {
                    "class": "City",
                    "description": "city",
                    "properties": [
                      {
                        "name": "name",
                        "description": "name",
                        "dataType": [
                          "text"
                        ]
                      },
                      {
                        "name": "hasEvent",
                        "description": "event references",
                        "dataType": [
                          "Event"
                        ],
                        "cardinality": "many"
                      }
                    ]
                  }
                ]
              }
            }
    
    
            client = self.getClient()
    
            if not client.contains_schema():
                client.create_schema(schema)
    
            event = {"acronym": "example"}
            client.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
            city = {"name": "Amsterdam"}
            client.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")
    
            time.sleep(2.0)
            client.add_reference("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
    
    
    if __name__ == "__main__":
        #import sys;sys.argv = ['', 'Test.testName']
        unittest.main()

2 ответа

Модульный тест для объектов соединения, схемы и данных, которые вы показываете выше, работает с клиентом Python v1.x следующим образом (см. Встроенные комментарии, чтобы узнать, что изменилось):

import unittest
import weaviate
import time
#import getpass

class TestWeaviate(unittest.TestCase):
# https://www.semi.technology/documentation/weaviate/current/client-libs/python.html

    def setUp(self):
        self.port=8153
        self.host="localhost"
        #if getpass.getuser()=="wf":
        #    self.host="zeus"
        #    self.port=8080
        pass
    
    def getClient(self):
        self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
        return self.client

    def tearDown(self):
        pass
        
    def testRunning(self):
        '''
        make sure weaviate is running
        '''
        w=self.getClient()
        self.assertTrue(w.is_live())
        self.assertTrue(w.is_ready())
            

    def testWeaviateSchema(self):
        ''' see https://www.semi.technology/documentation/weaviate/current/client-libs/python.html '''
        w = self.getClient()
        #contains_schema = w.schema.contains()
        try:
            w.schema.create("https://raw.githubusercontent.com/semi-technologies/weaviate-python-client/master/documentation/getting_started/people_schema.json") # instead of w.create_schema, see https://www.semi.technology/documentation/weaviate/current/how-tos/how-to-create-a-schema.html#creating-your-first-schema-with-the-python-client
        except:
            pass
        entries=[
            [ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
            [ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
            [ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
        ]
        for entry in entries:
            dict,type,uid=entry
            try:
                w.data_object.create(dict,type,uid) # instead of w.create(dict,type,uid), see https://www.semi.technology/documentation/weaviate/current/restful-api-references/semantic-kind.html#example-request-1
            except weaviate.exceptions.ThingAlreadyExistsException as taee:
                print ("%s already created" % dict['name'])
            
        pass
    
    def testPersons(self):
        return
        w = self.getClient()

        schema = {
        "actions": {"classes": [],"type": "action"},
        "things": {"classes": [{
            "class": "Person",
            "description": "A person such as humans or personality known through culture",
            "properties": [
                {
                    "cardinality": "atMostOne",
                    "dataType": ["text"],
                    "description": "The name of this person",
                    "name": "name"
                }
            ]}],
            "type": "thing"
        }
        }
        w.schema.create(schema) # instead of w.create_schema(schema)
        
        w.data_object.create({"name": "Andrew S. Tanenbaum"}, "Person") # instead of  w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
        w.data_object.create({"name": "Alan Turing"}, "Person")
        w.data_object.create({"name": "John von Neumann"}, "Person")
        w.data_object.create({"name": "Tim Berners-Lee"}, "Person")
        
    def testEventSchema(self):    
        '''
        https://stackru.com/a/63077495/1497139
        '''
        return
        schema = {
            "things": {
            "type": "thing",
            "classes": [
                {
                "class": "Event",
                "description": "event",
                "properties": [
                    {
                    "name": "acronym",
                    "description": "acronym",
                    "dataType": [
                        "text"
                    ]
                    },
                    {
                    "name": "inCity",
                    "description": "city reference",
                    "dataType": [
                        "City"
                    ],
                    "cardinality": "many"
                    }
                ]
                },
                {
                "class": "City",
                "description": "city",
                "properties": [
                    {
                    "name": "name",
                    "description": "name",
                    "dataType": [
                        "text"
                    ]
                    },
                    {
                    "name": "hasEvent",
                    "description": "event references",
                    "dataType": [
                        "Event"
                    ],
                    "cardinality": "many"
                    }
                ]
                }
            ]
            }
        }


        client = self.getClient()

        if not client.contains_schema():
            client.schema.create(schema) # instead of client.create_schema(schema)

        event = {"acronym": "example"}
        client.data_object.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde") # instead of client.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
        city = {"name": "Amsterdam"}
        client.data_object.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")

        time.sleep(2.0)
        client.data_object.reference.add("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde") # instead of client.add_reference("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde"), see https://www.semi.technology/documentation/weaviate/current/restful-api-references/semantic-kind.html#add-a-cross-reference


if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.testName']
    unittest.main()

Пока нет поддержки автоматического получения схемы из списка dict (или других форматов). Как вы упомянули, это может быть удобной функцией, поэтому мы добавляем ее к предложениям Weaviate!

Теперь доступна новая версия Weaviate (v1.2.1 — последняя версия на момент написания этой статьи). В этой версии много чего убрали и еще больше добавили. Одним из основных переломных изменений является то, что actionsа также thingsбыли удалены, objectsбыли введены вместо них. Все изменения и функции для weaviate v1.2 можно использовать с библиотекой python v2.3.

Большая часть текущей функциональности объясняется и показывается, как она работает в этой статье .

Вот те же юнит-тесты, но для Weaviate v1.2.1 и написанные с использованием weaviate-clientv2.3.1:

      import unittest
import weaviate
import time
#import getpass

person_schema = {
    "classes": [
    {
    "class": "Person",
    "description": "A person such as humans or personality known through culture",
    "properties": [
        {
        "name": "name",
        "description": "The name of this person",
        "dataType": ["text"]
        }
    ]
    },
    {
    "class": "Group",
    "description": "A set of persons who are associated with each other over some common properties",
    "properties": [
        {
        "name": "name",
        "description": "The name under which this group is known",
        "dataType": ["text"]
        },
        {
        "name": "members",
        "description": "The persons that are part of this group",
        "dataType": ["Person"]
        }
    ]
    }
]
}



class TestWeaviate(unittest.TestCase):
    # NEW link to the page
    # https://www.semi.technology/developers/weaviate/current/client-libraries/python.html

    def setUp(self):
        self.port=8080
        self.host="localhost"
        #if getpass.getuser()=="wf":
        #    self.host="zeus"
        #    self.port=8080
        pass
    
    def getClient(self):
        self.client=weaviate.Client("http://%s:%d" % (self.host,self.port))
        return self.client

    def tearDown(self):
        pass
        
    def testRunning(self):
        '''
        make sure weaviate is running
        '''
        w=self.getClient()
        self.assertTrue(w.is_live())
        self.assertTrue(w.is_ready())
            

    def testWeaviateSchema(self):
        # NEW link to the page
        # https://www.semi.technology/developers/weaviate/current/client-libraries/python.html
        w = self.getClient()
        #contains_schema = w.schema.contains()

        # it is a good idea to check if Weaviate has a schema already when testing, otherwise it will result in an error
        # this way you know for sure that your current schema is known to weaviate.

        if w.schema.contains():
            # delete the existing schema, (removes all the data objects too)
            w.schema.delete_all()
        # instead of w.create_schema(person_schema)
        w.schema.create(person_schema)
        entries=[
            [ {"name": "John von Neumann"}, "Person", "b36268d4-a6b5-5274-985f-45f13ce0c642"],
            [ {"name": "Alan Turing"}, "Person", "1c9cd584-88fe-5010-83d0-017cb3fcb446"],
            [ {"name": "Legends"}, "Group", "2db436b5-0557-5016-9c5f-531412adf9c6" ]
        ]
        for entry in entries:
            dict,type,uid=entry
            try:
                # instead of w.create(dict,type,uid), see https://www.semi.technology/developers/weaviate/current/restful-api-references/objects.html#create-a-data-object
                w.data_object.create(dict,type,uid)
            # ObjectAlreadyExistsException is the correct exception starting weaviate-client 2.0.0
            except weaviate.exceptions.ObjectAlreadyExistsException as taee: 
                print ("%s already created" % dict['name'])
            
        pass
    
    def testPersons(self):
        return
        w = self.getClient()

        schema = {
        #"actions": {"classes": [],"type": "action"}, `actions` and `things` were removed in weaviate v1.0 and removed in weaviate-client v2.0
        # Now there is only `objects`
        "classes": [
            {
            "class": "Person",
            "description": "A person such as humans or personality known through culture",
            "properties": [
                {
                    #"cardinality": "atMostOne", were removed in weaviate v1.0 and weaviate-client v2.0
                    "dataType": ["text"],
                    "description": "The name of this person",
                    "name": "name"
                }
            ]
            }
            ]
        }
        # instead of w.create_schema(schema)
        w.schema.create(schema) 
        
        # instead of  w.create_thing({"name": "Andrew S. Tanenbaum"}, "Person")
        w.data_object.create({"name": "Andrew S. Tanenbaum"}, "Person")
        w.data_object.create({"name": "Alan Turing"}, "Person")
        w.data_object.create({"name": "John von Neumann"}, "Person")
        w.data_object.create({"name": "Tim Berners-Lee"}, "Person")
        
    def testEventSchema(self):    
        '''
        https://stackoverflow.com/a/63077495/1497139
        '''
        return
        schema = {
            # "things": { , were removed in weaviate v1.0 and weaviate-client v2.0
            # "type": "thing", was removed in weaviate v1.0 and weaviate-client v2.0
            "classes": [
                {
                "class": "Event",
                "description": "event",
                "properties": [
                    {
                    "name": "acronym",
                    "description": "acronym",
                    "dataType": [
                        "text"
                    ]
                    },
                    {
                    "name": "inCity",
                    "description": "city reference",
                    "dataType": [
                        "City"
                    ],
                    # "cardinality": "many", were removed in weaviate v1.0 and weaviate-client v2.0
                    }
                ]
                },
                {
                "class": "City",
                "description": "city",
                "properties": [
                    {
                    "name": "name",
                    "description": "name",
                    "dataType": [
                        "text"
                    ]
                    },
                    {
                    "name": "hasEvent",
                    "description": "event references",
                    "dataType": [
                        "Event"
                    ],
                    # "cardinality": "many", were removed in weaviate v1.0 and weaviate-client v2.0
                    }
                ]
                }
            ]
        }


        client = self.getClient()

        # this test is going to fail if you are using the same Weaviate instance
        # We already created a schema in the test above so the new schme is not going to be created
        # and will result in an error.
        # we can delete the schema and create a new one.
        
        # instead of client.contains_schema()
        if client.schema.contains():
            # delete the existing schema, (removes all the data objects too)
            client.schema.delete_all()
        # instead of client.create_schema(schema)
        client.schema.create(schema)

        event = {"acronym": "example"}
        # instead of client.create(...)
        client.data_object.create(event, "Event", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")
        city = {"name": "Amsterdam"}
        client.data_object.create(city, "City", "c60505f9-8271-4eec-b998-81d016648d85")

        time.sleep(2.0)
        # instead of client.add_reference(...), see https://www.semi.technology/developers/weaviate/current/restful-api-references/objects.html#cross-references
        client.data_object.reference.add("c60505f9-8271-4eec-b998-81d016648d85", "hasEvent", "2a8d56b7-2dd5-4e68-aa40-53c9196aecde")


if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.testName']
    unittest.main()
Другие вопросы по тегам