In [2]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [3]:
# Sample data
dataset = [['Milk', 'Bread', 'Eggs'],
           ['Milk', 'Bread'],
           ['Eggs', 'Bread', 'Butter'],
           ['Milk', 'Bread', 'Butter'],
           ['Milk', 'Butter']]

In [4]:
# Convert data into correct format
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

In [6]:
# Find frequent itemsets
frequent_itemsets = apriori(df, min_support=0.4, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6, num_itemsets=2)

In [7]:
# Print results
print("Frequent Itemsets:\n", frequent_itemsets)
print("\nAssociation Rules:\n", rules)

Frequent Itemsets:
    support         itemsets
0      0.8          (Bread)
1      0.6         (Butter)
2      0.4           (Eggs)
3      0.8           (Milk)
4      0.4  (Bread, Butter)
5      0.4    (Bread, Eggs)
6      0.6    (Bread, Milk)
7      0.4   (Milk, Butter)

Association Rules:
   antecedents consequents  antecedent support  consequent support  support  \
0    (Butter)     (Bread)                 0.6                 0.8      0.4   
1      (Eggs)     (Bread)                 0.4                 0.8      0.4   
2     (Bread)      (Milk)                 0.8                 0.8      0.6   
3      (Milk)     (Bread)                 0.8                 0.8      0.6   
4    (Butter)      (Milk)                 0.6                 0.8      0.4   

   confidence      lift  representativity  leverage  conviction  \
0    0.666667  0.833333               1.0     -0.08         0.6   
1    1.000000  1.250000               1.0      0.08         inf   
2    0.750000  0.937500              

In [8]:
# Save and load
frequent_itemsets.to_csv('frequent_itemsets.csv', index=False)
rules.to_csv('association_rules.csv', index=False)

loaded_frequent_itemsets = pd.read_csv('frequent_itemsets.csv')
loaded_rules = pd.read_csv('association_rules.csv')

print("\nLoaded Frequent Itemsets:\n", loaded_frequent_itemsets)
print("\nLoaded Association Rules:\n", loaded_rules)


Loaded Frequent Itemsets:
    support                        itemsets
0      0.8            frozenset({'Bread'})
1      0.6           frozenset({'Butter'})
2      0.4             frozenset({'Eggs'})
3      0.8             frozenset({'Milk'})
4      0.4  frozenset({'Bread', 'Butter'})
5      0.4    frozenset({'Bread', 'Eggs'})
6      0.6    frozenset({'Bread', 'Milk'})
7      0.4   frozenset({'Milk', 'Butter'})

Loaded Association Rules:
              antecedents           consequents  antecedent support  \
0  frozenset({'Butter'})  frozenset({'Bread'})                 0.6   
1    frozenset({'Eggs'})  frozenset({'Bread'})                 0.4   
2   frozenset({'Bread'})   frozenset({'Milk'})                 0.8   
3    frozenset({'Milk'})  frozenset({'Bread'})                 0.8   
4  frozenset({'Butter'})   frozenset({'Milk'})                 0.6   

   consequent support  support  confidence      lift  representativity  \
0                 0.8      0.4    0.666667  0.833333          

## Hyperparameter tuning

In [9]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Sample data
dataset = [['Milk', 'Bread', 'Eggs'],
           ['Milk', 'Bread'],
           ['Eggs', 'Bread', 'Butter'],
           ['Milk', 'Bread', 'Butter'],
           ['Milk', 'Butter']]

# Convert data into correct format
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

# Explore effect of min_support
min_supports = [0.2, 0.4, 0.6]

for min_support in min_supports:
  # Find frequent itemsets
  frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
  print(f"Frequent Itemsets with min_support = {min_support}:\n", frequent_itemsets)

Frequent Itemsets with min_support = 0.2:
     support               itemsets
0       0.8                (Bread)
1       0.6               (Butter)
2       0.4                 (Eggs)
3       0.8                 (Milk)
4       0.4        (Bread, Butter)
5       0.4          (Bread, Eggs)
6       0.6          (Bread, Milk)
7       0.2         (Butter, Eggs)
8       0.4         (Milk, Butter)
9       0.2           (Milk, Eggs)
10      0.2  (Bread, Butter, Eggs)
11      0.2  (Bread, Milk, Butter)
12      0.2    (Bread, Milk, Eggs)
Frequent Itemsets with min_support = 0.4:
    support         itemsets
0      0.8          (Bread)
1      0.6         (Butter)
2      0.4           (Eggs)
3      0.8           (Milk)
4      0.4  (Bread, Butter)
5      0.4    (Bread, Eggs)
6      0.6    (Bread, Milk)
7      0.4   (Milk, Butter)
Frequent Itemsets with min_support = 0.6:
    support       itemsets
0      0.8        (Bread)
1      0.6       (Butter)
2      0.8         (Milk)
3      0.6  (Bread, Milk)