Выберите все возможные пары атрибутов в RapidMiner

У меня есть список, в котором общее количество атрибутов вначале неизвестно.

Я хочу скрыть все пары атрибутов, не зная, сколько их там.

E стьLoop Attribute Subsets Оператор, но, к сожалению, нет выхода.

В настоящее время мой процесс выглядит так:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.1.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="6.0.002" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="7.1.001" expanded="true" height="82" name="Generate Data" width="90" x="45" y="75">
        <process expanded="true">
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification" width="90" x="45" y="30">
            <list key="attribute_values">
              <parameter key="Group_1" value="&quot;A&quot;"/>
              <parameter key="Group_2" value="&quot;B&quot;"/>
              <parameter key="Group_3" value="&quot;C&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (2)" width="90" x="180" y="30">
            <list key="attribute_values">
              <parameter key="Group_1" value="&quot;B&quot;"/>
              <parameter key="Group_2" value="&quot;C&quot;"/>
              <parameter key="Group_3" value="&quot;D&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (3)" width="90" x="315" y="30">
            <list key="attribute_values">
              <parameter key="Group_1" value="&quot;D&quot;"/>
              <parameter key="Group_2" value="&quot;A&quot;"/>
              <parameter key="Group_3" value="&quot;B&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (4)" width="90" x="450" y="30">
            <list key="attribute_values">
              <parameter key="Group_1" value="&quot;A&quot;"/>
              <parameter key="Group_2" value="&quot;C&quot;"/>
              <parameter key="Group_3" value="&quot;M&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="generate_data_user_specification" compatibility="6.4.000" expanded="true" height="60" name="Generate Data by User Specification (5)" width="90" x="585" y="30">
            <list key="attribute_values">
              <parameter key="Group_1" value="&quot;C&quot;"/>
              <parameter key="Group_2" value="&quot;M&quot;"/>
              <parameter key="Group_3" value="&quot;M&quot;"/>
            </list>
            <list key="set_additional_roles"/>
          </operator>
          <operator activated="true" class="append" compatibility="7.1.001" expanded="true" height="148" name="Append" width="90" x="720" y="30"/>
          <operator activated="true" class="declare_missing_value" compatibility="6.4.000" expanded="true" height="76" name="Declare Missing Value" width="90" x="855" y="30">
            <parameter key="mode" value="nominal"/>
            <parameter key="nominal_value" value="M"/>
          </operator>
          <connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 2"/>
          <connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 1"/>
          <connect from_op="Generate Data by User Specification (3)" from_port="output" to_op="Append" to_port="example set 3"/>
          <connect from_op="Generate Data by User Specification (4)" from_port="output" to_op="Append" to_port="example set 4"/>
          <connect from_op="Generate Data by User Specification (5)" from_port="output" to_op="Append" to_port="example set 5"/>
          <connect from_op="Append" from_port="merged set" to_op="Declare Missing Value" to_port="example set input"/>
          <connect from_op="Declare Missing Value" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="extract_macro" compatibility="7.1.001" expanded="true" height="68" name="Extract Macro (2)" width="90" x="179" y="75">
        <parameter key="macro" value="num_attr"/>
        <parameter key="macro_type" value="number_of_attributes"/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (2)" width="90" x="313" y="75"/>
      <operator activated="true" class="multiply" compatibility="7.1.001" expanded="true" height="124" name="Multiply (2)" width="90" x="179" y="300"/>
      <operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (2)" width="90" x="380" y="210">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="|att1|att2"/>
      </operator>
      <operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (5)" width="90" x="514" y="210"/>
      <operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (3)" width="90" x="380" y="300">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="att1||att3"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="7.1.001" expanded="true" height="82" name="Select Attributes (4)" width="90" x="380" y="390">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="att2||att3"/>
      </operator>
      <operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (3)" width="90" x="514" y="390"/>
      <operator activated="true" class="rename_by_generic_names" compatibility="7.1.001" expanded="true" height="82" name="Rename by Generic Names (4)" width="90" x="514" y="300"/>
      <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (2)" width="90" x="648" y="210">
        <parameter key="condition_class" value="no_missing_attributes"/>
        <list key="filters_list"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (3)" width="90" x="648" y="300">
        <parameter key="condition_class" value="no_missing_attributes"/>
        <list key="filters_list"/>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="103" name="Filter Examples (4)" width="90" x="648" y="390">
        <parameter key="condition_class" value="no_missing_attributes"/>
        <list key="filters_list"/>
      </operator>
      <operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (2)" width="90" x="782" y="390">
        <parameter key="first_attribute" value="att1"/>
        <parameter key="second_attribute" value="att2"/>
      </operator>
      <operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (3)" width="90" x="782" y="300">
        <parameter key="first_attribute" value="att1"/>
        <parameter key="second_attribute" value="att2"/>
      </operator>
      <operator activated="true" class="generate_concatenation" compatibility="7.1.001" expanded="true" height="82" name="Generate Concatenation (4)" width="90" x="782" y="210">
        <parameter key="first_attribute" value="att1"/>
        <parameter key="second_attribute" value="att2"/>
      </operator>
      <operator activated="true" class="append" compatibility="7.1.001" expanded="true" height="124" name="Append (3)" width="90" x="916" y="255"/>
      <connect from_op="Generate Data" from_port="out 1" to_op="Extract Macro (2)" to_port="example set"/>
      <connect from_op="Extract Macro (2)" from_port="example set" to_op="Rename by Generic Names (2)" to_port="example set input"/>
      <connect from_op="Rename by Generic Names (2)" from_port="example set output" to_op="Multiply (2)" to_port="input"/>
      <connect from_op="Rename by Generic Names (2)" from_port="original" to_port="result 2"/>
      <connect from_op="Multiply (2)" from_port="output 1" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Multiply (2)" from_port="output 2" to_op="Select Attributes (3)" to_port="example set input"/>
      <connect from_op="Multiply (2)" from_port="output 3" to_op="Select Attributes (4)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Rename by Generic Names (5)" to_port="example set input"/>
      <connect from_op="Rename by Generic Names (5)" from_port="example set output" to_op="Filter Examples (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (3)" from_port="example set output" to_op="Rename by Generic Names (4)" to_port="example set input"/>
      <connect from_op="Select Attributes (4)" from_port="example set output" to_op="Rename by Generic Names (3)" to_port="example set input"/>
      <connect from_op="Rename by Generic Names (3)" from_port="example set output" to_op="Filter Examples (4)" to_port="example set input"/>
      <connect from_op="Rename by Generic Names (4)" from_port="example set output" to_op="Filter Examples (3)" to_port="example set input"/>
      <connect from_op="Filter Examples (2)" from_port="example set output" to_op="Generate Concatenation (4)" to_port="example set input"/>
      <connect from_op="Filter Examples (3)" from_port="example set output" to_op="Generate Concatenation (3)" to_port="example set input"/>
      <connect from_op="Filter Examples (4)" from_port="example set output" to_op="Generate Concatenation (2)" to_port="example set input"/>
      <connect from_op="Generate Concatenation (2)" from_port="example set output" to_op="Append (3)" to_port="example set 3"/>
      <connect from_op="Generate Concatenation (3)" from_port="example set output" to_op="Append (3)" to_port="example set 2"/>
      <connect from_op="Generate Concatenation (4)" from_port="example set output" to_op="Append (3)" to_port="example set 1"/>
      <connect from_op="Append (3)" from_port="merged set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>

1 ответ

Это сложный вопрос. Loop Subsets Оператор не возвращает ни одного набора примеров, потому что он создаст несколько различных наборов примеров, каждый из которых состоит из атрибутов, составленных из комбинаций входных атрибутов. Чтобы обойти это, Recall а также Remember операторы могут быть использованы для хранения промежуточных итогов. История еще не закончена, потому что обычно бывает так, что требуется один набор примеров, поэтому это означает, что для переименования и присоединения требуется некоторая экстремальная гимнастика.

Короче говоря, я приложил отдельный пример процесса, который иллюстрирует все это. Это не будет работать без адаптации к вашим данным.

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.0.001" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
    <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="multiply" compatibility="7.0.001" expanded="true" height="103" name="Multiply" width="90" x="45" y="136"/>
      <operator activated="true" class="loop_attribute_subsets" compatibility="7.0.001" expanded="true" height="68" name="Loop Subsets" width="90" x="179" y="34">
    <parameter key="exact_number_of_attributes" value="2"/>
    <parameter key="min_number_of_attributes" value="2"/>
    <parameter key="limit_max_number" value="true"/>
    <parameter key="max_number_of_attributes" value="2"/>
    <process expanded="true">
      <operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="112" y="34">
        <list key="log">
          <parameter key="Attributes" value="operator.Loop Subsets.value.feature_names"/>
        </list>
      </operator>
      <operator activated="true" class="log_to_data" compatibility="7.0.001" expanded="true" height="103" name="Log to Data" width="90" x="112" y="238">
        <parameter key="log_name" value="Log"/>
      </operator>
      <operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="103" name="Subprocess" width="90" x="246" y="238">
        <process expanded="true">
          <operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="136">
        <parameter key="macro" value="remember"/>
        <parameter key="macro_type" value="data_value"/>
        <parameter key="attribute_name" value="Attributes"/>
        <parameter key="example_index" value="1"/>
        <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="clear_log" compatibility="7.0.001" expanded="true" height="82" name="Clear Log" width="90" x="380" y="136">
        <parameter key="log_name" value="Log"/>
        <parameter key="delete_table" value="true"/>
          </operator>
          <connect from_port="in 1" to_port="out 1"/>
          <connect from_port="in 2" to_op="Extract Macro" to_port="example set"/>
          <connect from_op="Extract Macro" from_port="example set" to_op="Clear Log" to_port="through 1"/>
          <connect from_op="Clear Log" from_port="through 1" to_port="out 2"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="source_in 3" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
          <portSpacing port="sink_out 3" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="materialize_data" compatibility="7.0.001" expanded="true" height="82" name="Materialize Data" width="90" x="246" y="34"/>
      <operator activated="true" class="rename_by_generic_names" compatibility="7.0.001" expanded="true" height="82" name="Rename by Generic Names" width="90" x="380" y="34"/>
      <operator activated="true" class="generate_concatenation" compatibility="7.0.001" expanded="true" height="82" name="Generate Concatenation" width="90" x="380" y="136">
        <parameter key="first_attribute" value="att1"/>
        <parameter key="second_attribute" value="att2"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="7.0.001" expanded="true" height="82" name="Select Attributes" width="90" x="380" y="238">
        <parameter key="attribute_filter_type" value="subset"/>
        <parameter key="attributes" value="att2|att1"/>
        <parameter key="invert_selection" value="true"/>
      </operator>
      <operator activated="true" class="rename" compatibility="7.0.001" expanded="true" height="82" name="Rename" width="90" x="514" y="34">
        <parameter key="old_name" value="att1_att2"/>
        <parameter key="new_name" value="%{remember}"/>
        <list key="rename_additional_attributes"/>
      </operator>
      <operator activated="true" class="handle_exception" compatibility="7.0.001" expanded="true" height="82" name="Handle Exception" width="90" x="514" y="136">
        <process expanded="true">
          <operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall (2)" width="90" x="45" y="187">
        <parameter key="name" value="runningTotal"/>
        <parameter key="remove_from_store" value="false"/>
          </operator>
          <operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join" width="90" x="179" y="34">
        <list key="key_attributes">
          <parameter key="Play" value="Play"/>
        </list>
          </operator>
          <operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember" width="90" x="246" y="187">
        <parameter key="name" value="runningTotal"/>
          </operator>
          <connect from_port="in 1" to_op="Join" to_port="left"/>
          <connect from_op="Recall (2)" from_port="result" to_op="Join" to_port="right"/>
          <connect from_op="Join" from_port="join" to_op="Remember" to_port="store"/>
          <connect from_op="Remember" from_port="stored" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
        <process expanded="true">
          <operator activated="true" class="remember" compatibility="7.0.001" expanded="true" height="68" name="Remember (2)" width="90" x="179" y="34">
        <parameter key="name" value="runningTotal"/>
          </operator>
          <connect from_port="in 1" to_op="Remember (2)" to_port="store"/>
          <connect from_op="Remember (2)" from_port="stored" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <connect from_port="example set" to_op="Log" to_port="through 1"/>
      <connect from_op="Log" from_port="through 1" to_op="Log to Data" to_port="through 1"/>
      <connect from_op="Log to Data" from_port="exampleSet" to_op="Subprocess" to_port="in 2"/>
      <connect from_op="Log to Data" from_port="through 1" to_op="Subprocess" to_port="in 1"/>
      <connect from_op="Subprocess" from_port="out 1" to_op="Materialize Data" to_port="example set input"/>
      <connect from_op="Materialize Data" from_port="example set output" to_op="Rename by Generic Names" to_port="example set input"/>
      <connect from_op="Rename by Generic Names" from_port="example set output" to_op="Generate Concatenation" to_port="example set input"/>
      <connect from_op="Generate Concatenation" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Rename" to_port="example set input"/>
      <connect from_op="Rename" from_port="example set output" to_op="Handle Exception" to_port="in 1"/>
      <portSpacing port="source_example set" spacing="0"/>
    </process>
      </operator>
      <operator activated="true" class="subprocess" compatibility="7.0.001" expanded="true" height="82" name="Subprocess (2)" width="90" x="313" y="34">
    <process expanded="true">
      <operator activated="true" class="recall" compatibility="7.0.001" expanded="true" height="68" name="Recall" width="90" x="246" y="85">
        <parameter key="name" value="runningTotal"/>
      </operator>
      <connect from_op="Recall" from_port="result" to_port="out 1"/>
      <portSpacing port="source_in 1" spacing="0"/>
      <portSpacing port="source_in 2" spacing="0"/>
      <portSpacing port="sink_out 1" spacing="0"/>
      <portSpacing port="sink_out 2" spacing="0"/>
    </process>
      </operator>
      <operator activated="true" class="join" compatibility="7.0.001" expanded="true" height="82" name="Join (2)" width="90" x="581" y="136">
    <list key="key_attributes"/>
      </operator>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Loop Subsets" to_port="example set"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Join (2)" to_port="right"/>
      <connect from_op="Loop Subsets" from_port="example set" to_op="Subprocess (2)" to_port="in 1"/>
      <connect from_op="Subprocess (2)" from_port="out 1" to_op="Join (2)" to_port="left"/>
      <connect from_op="Join (2)" from_port="join" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="90"/>
    </process>
  </operator>
</process>

Примечания к сведению

  • Loop Subsets оператор настроен на выбор пар атрибутов
  • С помощью Log а также Log to Data внутри Loop Subsets позволяет регистрировать текущую пару атрибутов, переносить их в набор примеров и затем копировать в макрос.
  • Атрибуты переименовываются в общее имя, объединяются, а затем результат переименовывается обратно в исходное имя.
  • Пример промежуточного итогового набора создается с помощью Join на предыдущую итерацию. В первый раз предыдущей итерации нет, и это обрабатывается Handle Exception оператор.
  • Вне Loop Subsets оператор, набор примеров промежуточного итога вызывается внутри Sub Process для обеспечения правильности оформления заказа.
  • Промежуточный итог объединяется с исходными данными, чтобы было легко увидеть, работает ли он или нет.

Последний пункт, Materialize Data оператор требуется, хотя это не должно быть.

Другие вопросы по тегам