1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
use index::boolean_index::boolean_query::{QueryAtom, BooleanOperator, PositionalOperator, FilterOperator, BooleanQuery};

/// This struct provides a flexible and ergonomic was to build `BooleanQueries`
///
/// # Examples
/// ## Simple Atom-Queries
/// Querying an Atom:
///
/// ```rust
/// use perlin::index::boolean_index::QueryBuilder;
/// //Would return all document ids of documents that contain '4'
/// let query = QueryBuilder::atom(4).build();
/// ```
/// ## Boolean Operators
/// Querying for two Atoms:
///
/// ```rust
/// use perlin::index::boolean_index::QueryBuilder;
/// //Would return all document ids of documents that contain '4' AND '8'
/// let and_query = QueryBuilder::and(QueryBuilder::atoms(vec![4, 8])).build();
///
/// //Would return all document ids of documents that contain '4' OR '8'
/// let or_query = QueryBuilder::or(QueryBuilder::atoms(vec![4, 8])).build();
/// ```
///
/// ## Positional Operators
/// Querying for phrases:
///
/// ```rust
/// use perlin::index::boolean_index::QueryBuilder;
///
/// //Would return all document ids of documents that contain '1 2 3'
/// let phrase_query = QueryBuilder::in_order(
///                vec![Some(1), Some(2), Some(3)]).build();
///
/// //Would match any phrase in following form "this is a * house"
/// //Where '*' can be any term
/// //i.e. "this is a blue house" or "this is a small house"
/// //It would not match "this is a house" though.
/// let placeholder_query = QueryBuilder::in_order(
///     vec![Some("this"),Some("is"), Some("a"), None, Some("house")]).build();
/// ```
/// ## Query Filters
///
/// ```rust
/// use perlin::index::boolean_index::QueryBuilder;
///
/// //Would return all document ids of documents that contain '4' but NOT '8'
/// let filtered_query =
/// QueryBuilder::atom(4).not(QueryBuilder::atom(8)).build();
/// ```
///
/// ## Nested Queries
/// Query objects can be nested in arbitrary depth:
///
/// ```rust
/// use perlin::index::boolean_index::QueryBuilder;
///
/// let nested_query = QueryBuilder::and(vec![
///                         QueryBuilder::or(QueryBuilder::atoms(vec![0, 4])),
///                         QueryBuilder::in_order(
///                                    vec![Some(9),
///                                         Some(7)])
///                                    .not(QueryBuilder::atom(3))]).build();
/// ```


pub struct QueryBuilder<TTerm> {
    query: BooleanQuery<TTerm>,
}

impl<TTerm> QueryBuilder<TTerm> {
    /// Operands are connected by the AND-Operator. All operands have to occur
    /// in a document for it to match
    pub fn and(operands: Vec<QueryBuilder<TTerm>>) -> Self {
        QueryBuilder {
            query: BooleanQuery::NAry(BooleanOperator::And,
                                      operands.into_iter().map(|o| o.build()).collect::<Vec<_>>()),
        }
    }

    /// Operands are connected by the OR-Operator. Any operand can occur in a docment for it to match
    pub fn or(operands: Vec<QueryBuilder<TTerm>>) -> Self {
        QueryBuilder {
            query: BooleanQuery::NAry(BooleanOperator::Or,
                                      operands.into_iter().map(|o| o.build()).collect::<Vec<_>>()),
        }
    }

    /// Turns a vector of terms into a vector of `QueryBuilder` objects.
    ///
    /// Useful utility function to be used with other methods in this struct.
    /// See module level documentation for examples.
    pub fn atoms(terms: Vec<TTerm>) -> Vec<Self> {
        terms.into_iter().map(QueryBuilder::atom).collect::<Vec<_>>()
    }

    /// Most simple query for just a term. Documents containing the term match, all others do not.
    pub fn atom(term: TTerm) -> Self {
        QueryBuilder { query: BooleanQuery::Atom(QueryAtom::new(0, term)) }
    }

    /// Use this method to build phrase queries.
    ///
    /// Operands must occur in the same order as passed to the method in the document.
    /// Operands are wrapped inside `Option` to allow for placeholders in phrase queries.
    ///
    /// I.e. Some(A):  A has to occur at that position
    ///      None: Any term can occur at that position.
    /// See module level documentation for examples
    pub fn in_order(operands: Vec<Option<TTerm>>) -> Self {
        QueryBuilder {
            query: BooleanQuery::Positional(PositionalOperator::InOrder,
                                            operands.into_iter()
                                                .enumerate()
                                                .filter(|&(_, ref t)| t.is_some())
                                                .map(|(i, t)| QueryAtom::new(i, t.unwrap()))
                                                .collect::<Vec<_>>()),
        }
    }


    /// Applies the NOT operator between to queries.
    /// E.g. returns all document ids that match self and NOT filter
    /// See module level documentation for example
    pub fn not(self, filter: Self) -> Self {
        QueryBuilder {
            query: BooleanQuery::Filter(FilterOperator::Not,
                                        Box::new(self.build()),
                                        Box::new(filter.build())),
        }

    }

    /// Final method to be called in the query building process.
    /// Returns the actual `BooleanQuery` object to be passed to the index.
    pub fn build(self) -> BooleanQuery<TTerm> {
        self.query
    }
}


#[cfg(test)]
mod tests {
    use super::QueryBuilder;

    use index::Index;
    use index::boolean_index::tests::prepare_index;

    #[test]
    fn and_query() {
        let index = prepare_index();
        let query = QueryBuilder::and(QueryBuilder::atoms(vec![0, 5])).build();
        assert_eq!(index.execute_query(&query).collect::<Vec<_>>(), vec![0, 2]);
    }

    #[test]
    fn nested_and() {
        let index = prepare_index();
        let query = QueryBuilder::and(vec![QueryBuilder::or(QueryBuilder::atoms(vec![12, 7])), QueryBuilder::atom(9)])
            .build();
        assert_eq!(index.execute_query(&query).collect::<Vec<_>>(), vec![0]);
    }


    #[test]
    fn not() {
        let index = prepare_index();
        let query = QueryBuilder::and(QueryBuilder::atoms(vec![0, 5])).not(QueryBuilder::atom(9)).build();
        assert_eq!(index.execute_query(&query).collect::<Vec<_>>(), vec![2]);
    }

    #[test]
    fn in_order() {
        let index = prepare_index();
        let query = QueryBuilder::in_order(vec![Some(0), None, Some(2)]).build();
        assert_eq!(index.execute_query(&query).collect::<Vec<_>>(), vec![0]);
        let query = QueryBuilder::in_order(vec![Some(0), Some(2)]).build();
        assert_eq!(index.execute_query(&query).collect::<Vec<_>>(), vec![1]);
    }
}