root/Explorator/trunk/lib/query_builder.rb @ 479

Revision 479, 16.0 KB (checked in by samuraraujo, 8 years ago)
Line 
1require "date"
2#SemanticExpression is a DSL designed for the Explorator.
3#This class implements the operation meta-model defined in the specification
4#The following operation were implemented.
5# union - operation over set
6# intersection - operation over set
7# difference - operation over set
8# spo - semantic operation defined in the Explorator meta-model.
9# keyword - search by keyword
10#Author: Samur Araujo
11#Date: 25 jun 2008.
12require 'active_rdf'
13require 'rjb'
14class SemanticExpression
15  #:result - It is a array of RDFS::Resource.
16  attr_accessor  :result 
17  #constructor.
18  def initialize(s=nil,p=nil,o=nil,r=nil)   
19    # initialize the variable query with the ActiveRDF Query
20    @result = Array.new   
21    if s != nil || p != nil || o != nil
22      union(s,p,o,r)
23    end
24  end   
25  #This method return an array of resource, no matter what was passed as parameter.
26  #  This method is necessary because the spo can be called passing a ResourceSet id as parameters.
27  def resource_or_set_to_array(s)
28    if s == nil
29      Array.new
30    elsif Thread.current[:application].is_set?(s)
31      Thread.current[:application].get(s).resources
32    else
33      Array.new << s
34    end
35  end
36  #
37  #returns a array of resource. Sometimes the parameter s could be a SemanticExpression instance.
38  def resource_or_self(s,r)
39    if s == nil
40      Array.new
41    elsif s.instance_of? SemanticExpression
42      s.result.collect {|s,p,o| eval(r.to_s)}.compact.uniq
43    elsif s.instance_of? Array
44      s
45    else
46      Array.new << s
47    end
48  end
49  #This method is executes a semantic operation.
50  #Keep in mind that this method could receive as parameter a resource, an array of resources or a SemanticExpression instance, in the
51  # parameters s, p or o.
52  # the parameter r is the variable in the triple that should be gathered.
53  #the same method as query, but it is able to treat arrays.
54  #This methos
55  def spo(s,p,o,r=nil)         
56    result = Array.new
57    s = resource_or_self(s,r).uniq
58    p = resource_or_self(p,r).uniq
59    o = resource_or_self(o,r).uniq
60    s.each do |x|
61      p.each do |y|
62        o.each do |z|
63          #          puts x.to_s + ' **** ' + y.to_s  + ' **** ' + z.to_s
64          result |= query(x,y,z,r)
65        end       
66      end     
67    end   
68    @result = @result | result
69    self
70  end
71 
72  def remove(s,p,o,r=nil)         
73    result = Array.new
74    s = resource_or_self(s,r).uniq
75    p = resource_or_self(p,r).uniq
76    o = resource_or_self(o,r).uniq
77    adapter=ConnectionPool.get_adapter('USERDATA(Local)')         
78    repository =adapter.bridge 
79    s.each do |x|
80      p.each do |y|
81        o.each do |z|
82          x = nil if x.instance_of? Symbol
83          y = nil if y.instance_of? Symbol
84          z = nil if z.instance_of? Symbol
85          repository.delete(x,y,z,nil)           
86        end       
87      end     
88    end   
89    adapter.reset_cache()
90    @result = []
91    self
92  end
93  #adds keyword query to the expression
94  def search (word)
95    k=word
96    if  word.index('http://')  || word.index('imap://')     
97      k = RDFS::Resource.new(word)
98      spo(k,:p,:o)
99      # spo(:s,k,:o)
100      spo(:s,:p,k)   
101    else
102      #not URI
103      if k[0] == 58 # ASCII for :
104        tokens = k.split(' ')
105        predicate = tokens[0]
106        tokens.delete_at(0)
107        p = Query.new.distinct(:p).where(:s,:p,:o).keyword_where(:p,predicate.gsub(':','')).execute
108        k = tokens.join(' ')
109        @result = @result | Query.new.distinct(:s ,:o).where(:s,p[0],:o).keyword_where(:o,k).execute.collect{|s,o| [s,p[0],o]}
110      else
111        @result = @result | Query.new.distinct(:s,:p,:o).where(:s,:p,:o).keyword_where(:o,word).execute 
112      end     
113    end 
114    self
115  end
116  def go(uri)
117    adapter = ConnectionPool.adapters.select {|adapter| 
118      adapter.title == 'EXPLORATOR(Local)'
119    }
120    uri = RDFS::Resource.new(uri)
121    adapter.first().bridge.loaduri(uri.uri, false,'rdf');   
122    adapter.first().reset_cache()
123    @result = @result | Query.new.distinct(:s,:p,:o).where(:s,:p,:o,RDFS::Resource.new(uri)).execute
124    self
125  end
126  def keyword(k)       
127   
128    @result = @result | Query.new.distinct(:s,:p,:o).where(:s,:p,:o).keyword_where(:s,k).execute | Query.new.distinct(:s,:p,:o).where(:s,:p,:o).keyword_where(:p,k).execute | Query.new.distinct(:s,:p,:o).where(:s,:p,:o).keyword_where(:o,k).execute
129   
130   
131    self
132  end 
133  #Wrapper for the class ActiveRDF Query. This method executes a query and returns a set of resources.
134  #Each parameter must be a single resource.
135  #In the following situations we have to build the triples after query results.
136  #
137  #Two variables in the query
138  #x ? ?
139  #? x ?
140  #? ? x
141  #
142  #One variable in the query
143  #
144  #x y ?
145  #x ? Y
146  #? x y
147  #
148  #No variable
149  #In this case we have to execute a ASK statement instead the select
150  def query (s,p,o,r=nil) 
151   
152    q = Query.new
153    ask = false
154    variables = Array.new
155    if r.to_s == :p.to_s       
156      variables << :p if p.instance_of? Symbol
157      variables << :x
158      variables << :y
159      q.distinct(:p)      if p.instance_of? Symbol
160      q.distinct(:x,:y)
161      q.distinct(:label_s,:type_s ) if Thread.current[:query_retrieve_label_and_type]
162      q.where(to_resource(s,:s),to_resource(p,:p),to_resource(o,:o)).where(to_resource(p,:p),:x,:y)
163      q.optional(to_resource(p,:p),RDFS::label,:label_s).optional(to_resource(p,:p),RDF::type,:type_s) if Thread.current[:query_retrieve_label_and_type]
164      #   q.optional(to_resource(x,:x),RDFS::label,:label_p).optional(to_resource(x,:x),RDF::type,:type_p) if Thread.current[:query_retrieve_label_and_type]
165      #      q.optional(to_resource(y,:y),RDFS::label,:label_o).optional(to_resource(y,:y),RDF::type,:type_o) if Thread.current[:query_retrieve_label_and_type]     
166    elsif r.to_s == :o.to_s     
167      variables << :o if o.instance_of? Symbol
168      variables << :x
169      variables << :y 
170      q.distinct(:o) if o.instance_of? Symbol
171      q.distinct(:x,:y)
172      q.distinct(:label_s,:type_s  )if Thread.current[:query_retrieve_label_and_type]
173      q.where(to_resource(s,:s),to_resource(p,:p),to_resource(o,:o)).where(to_resource(o,:o),:x,:y)
174      q.optional(to_resource(o,:o),RDFS::label,:label_s).optional(to_resource(o,:o),RDF::type,:type_s) if Thread.current[:query_retrieve_label_and_type]
175      #  q.optional(to_resource(x,:x),RDFS::label,:label_p).optional(to_resource(x,:x),RDF::type,:type_p) if Thread.current[:query_retrieve_label_and_type]
176      #      q.optional(to_resource(y,:y),RDFS::label,:label_o).optional(to_resource(y,:y),RDF::type,:type_o) if Thread.current[:query_retrieve_label_and_type]     
177    else     
178      variables << :s if s.instance_of? Symbol
179      variables << :p if p.instance_of? Symbol
180      variables << :o if o.instance_of? Symbol   
181      if variables.size == 0
182        q.ask() 
183        ask =true
184      else
185        q.distinct(:s)  if s.instance_of? Symbol
186        q.distinct(:p)  if p.instance_of? Symbol
187        q.distinct(:o)  if o.instance_of? Symbol       
188        q.distinct(:label_s,:type_s  ) if Thread.current[:query_retrieve_label_and_type]
189      end
190      q.where(to_resource(s,:s),to_resource(p,:p),to_resource(o,:o))
191      q.optional(to_resource(s,:s),RDFS::label,:label_s).optional(to_resource(s,:s),RDF::type,:type_s) if Thread.current[:query_retrieve_label_and_type]
192      #  q.optional(to_resource(p,:p),RDFS::label,:label_p).optional(to_resource(p,:p),RDF::type,:type_p) if Thread.current[:query_retrieve_label_and_type]
193      #      q.optional(to_resource(o,:o),RDFS::label,:label_o).optional(to_resource(o,:o),RDF::type,:type_o) if Thread.current[:query_retrieve_label_and_type]     
194    end
195    if Thread.current[:query_retrieve_label_and_type]
196      variables |= [:label_s,:type_s  ]
197     
198    end
199    values = q.execute 
200    #process a sparql result and convert it to triple
201    triples = Array.new
202    idxs=variables.index(:s)
203    idxp=variables.index(:p)
204    idxo=variables.index(:o) 
205   
206    if Thread.current[:query_retrieve_label_and_type]
207      idxs_label = [variables.index(:label_s) ]
208      idxs_type = [variables.index(:type_s)  ]
209     
210    end
211   
212    if r.to_s == :p.to_s       
213      idxs=variables.index(:p)
214      idxp=variables.index(:x)
215      idxo=variables.index(:y)     
216    elsif r.to_s == :o.to_s     
217      idxs=variables.index(:o)
218      idxp=variables.index(:x)
219      idxo=variables.index(:y)
220    end 
221   
222    if Thread.current[:query_retrieve_label_and_type]
223      cache = Hash.new   
224      c_label = RDFS::label
225      c_type = RDF::type
226    end
227   
228   
229   
230   
231    values.each do |x|
232     
233     
234      triple = Array.new       
235      triple << (idxs == nil ? to_resource(s,:s) : (x.instance_of?(Array) ? x[idxs] : x))  #subject
236      triple << (idxp == nil ? to_resource(p,:p) : (x.instance_of?(Array) ? x[idxp] : x))  #predicate
237      triple << (idxo == nil ? to_resource(o,:o) : (x.instance_of?(Array) ? x[idxo] : x))  #object
238     
239     
240      if Thread.current[:query_retrieve_label_and_type] 
241       
242        uris =   [triple[0].uri] 
243        uris.each {|t| cache[t]= Hash.new  if cache[t] == nil}       
244       
245        uris.each_index{|idx| 
246          if x[idxs_label[idx]] != nil
247            cache[uris[idx]][c_label]= Array.new if cache[uris[idx]][c_label] == nil
248            cache[uris[idx]][c_label] << x[idxs_label[idx]]
249           
250          end
251          if x[idxs_type[idx]] != nil
252            cache[uris[idx]][c_type]= Array.new if cache[uris[idx]][c_type] == nil
253            cache[uris[idx]][c_type] << x[idxs_type[idx]]
254          end
255        }
256       
257      end
258     
259      triples << triple       
260     
261    end   if !ask
262   
263    if Thread.current[:query_retrieve_label_and_type]
264      Thread.current[:label_type_cache] = cache   if  Thread.current[:label_type_cache] == nil 
265      Thread.current[:label_type_cache].merge!(cache)
266    end
267   
268    if ask && values.index('true') != nil
269      triple = Array.new
270      triple <<  to_resource(s,:s) 
271      triple <<  to_resource(p,:p) 
272      triple <<  to_resource(o,:o) 
273      triples << triple   
274    end
275   
276   
277    triples.uniq
278  end
279  #this code does the same that the function query above does. However, it use filter and it is less efficient.
280  #  def query(s,p,o,r=nil)       
281  #    q = Query.new   
282  #    if r.to_s == :p.to_s
283  #      #  q.distinct(:p,:x,:y).where(:s,:p,:o).where(:p,:x,:y).optional(:p,RDFS::label,:label).sort(' ?p ')
284  #      q.distinct(:p,:x,:y).where(:s,:p,:o).where(:p,:x,:y)
285  #    elsif r.to_s == :o.to_s
286  #      # q.distinct(:o,:x,:y).where(:s,:p,:o).where(:o,:x,:y).optional(:o,RDFS::label,:label).sort(' ?o ')       
287  #      q.distinct(:o,:x,:y).where(:s,:p,:o).where(:o,:x,:y)
288  #    else
289  #      #  q.distinct(:s,:p,:o).where(:s,:p,:o).optional(:s,RDFS::label,:label).sort(' ?s ')
290  #      q.distinct(:s,:p,:o).where(:s,:p,:o)
291  #    end   
292  #    q.filter(to_filter(s,:s)).filter(to_filter(p,:p)).filter(to_filter(o,:o))   
293  #    #    q.sort(' ?label ')
294  #    q.execute           
295  #  end   
296  #  def to_filter(value,symbol)   
297  #    if value == symbol
298  #      nil
299  #    else
300  #      str =''
301  #     
302  #      if !isLiteral(value)
303  #        str = '?' + symbol.to_s + ' = ' + value.to_s     
304  #      else
305  #        str = "str(?"+symbol.to_s+")='"+value.to_s+"'"
306  #      end
307  #      str
308  #    end
309  #  end 
310  #Union method,
311  #s - represents the s in the (s,p,o) triple or the set id or a SemanticExpression instance.
312  #p - p in the triple
313  #o - o in the triple
314  #r - the position on the triple that should be returned.
315  def union(s,p=nil,o=nil, r=nil)   
316    if s.instance_of? SemanticExpression 
317      @result = @result | s.result
318      #Union, Intersection and Difference are operation over sets.
319    elsif s.instance_of? Array       
320      @result = @result | s
321    elsif Thread.current[:application].is_set?(s)
322      #returns all set of resources
323      if r != nil   && r!= :s
324        @result = @result | SemanticExpression.new.spo(Thread.current[:application].get(s).elements.collect{|s,p,o| eval(r.to_s)}.uniq,:p,:o).result
325      else
326        @result = @result | Thread.current[:application].get(s).elements
327      end
328      #Union method, passed as parameter a triple expression
329    else
330      @result = @result | query(s,p,o,r)
331    end
332   
333    self
334  end
335  #Intersection method
336  #s - represents the s in the (s,p,o) triple or the set id
337  #p - p in the triple
338  #o - o in the triple
339  #r - the position on the triple that should be returned.
340  def intersection(s,p=nil,o=nil,r=nil)   
341    tmp = @result
342    if s.instance_of? SemanticExpression 
343      tmp =  s.result     
344      #Intersection, Intersection and Difference are operation over sets.
345    elsif s.instance_of? Array 
346      tmp =  s     
347      #Intersection, Intersection and Difference are operation over sets.     
348    elsif Thread.current[:application].is_set?(s)
349      #returns all set of resources
350      if r != nil   && r!= :s
351        tmp = SemanticExpression.new.spo(Thread.current[:application].get(s).elements.collect{|s,p,o| eval(r.to_s)}.uniq,:p,:o).result
352      else
353        tmp = Thread.current[:application].get(s).elements
354      end
355      #Intersection method, passed as parameter a triple expression
356    else
357      tmp = query(s,p,o,r)
358    end
359    #@result = @result & tmp - The intersection is between the subjects and it is not between triples.
360   
361    a = tmp.collect{|s,p,o| s}
362   
363    @result = @result.collect { |s,p,o| [s,p,o] if a.include?(s) } 
364   
365    self
366  end
367  #Difference method
368  #s - represents the s in the (s,p,o) triple or the set id
369  #p - p in the triple
370  #o - o in the triple
371  #r - the position on the triple that should be returned.
372  def difference(s,p=nil,o=nil,r=nil)   
373    tmp = Array.new
374    if s.instance_of? SemanticExpression 
375      tmp =  s.result   
376    elsif s.instance_of? Array 
377      tmp =  s
378      #Difference, Intersection and Difference are operation over sets.
379    elsif Thread.current[:application].is_set?(s)
380      #returns all set of resources
381      if r != nil   && r!= :s
382        tmp = SemanticExpression.new.spo(Thread.current[:application].get(s).elements.collect{|s,p,o| eval(r.to_s)}.uniq,:p,:o).result
383      else 
384        tmp =  Thread.current[:application].get(s).elements
385      end
386      #Difference method, passed as parameter a triple expression
387    else   
388      tmp =  query(s,p,o,r)
389    end
390    #@result = @result & tmp - The difference is between the subjects and it is not between triples.
391    a = tmp.collect{|s,p,o| s}
392    @result = @result.collect { |s,p,o| [s,p,o] if !a.include?(s) }     
393    self
394  end   
395  #this method applies a filter to the result of the expression.
396  #note that we are not using the sparql filter, we are applying the filter directly in the ruby objects.
397  #This was decided because some adapters could ever implement any kind of filters.
398  def filter (exp) 
399    begin 
400      @result =  eval('@result.' + exp)     
401    rescue
402      return self
403    end
404    if !@result.instance_of? Array
405      @result = (Array.new << @result.to_s) 
406    end
407    self   
408  end
409  #delete triples in the repositories
410  def delete(s,p,o)   
411    ConnectionPool.write_adapter.delete(s,p,o)   
412  end
413  #Verifies whether the variable is a resource
414  # It could receive 3 parameter types: Literal, a Symbol or a Resource. All in string format.
415  def isLiteral(r)       
416    if r != nil && !(r.instance_of? Symbol) && !(r.instance_of? BNode) && !(r.instance_of? RDFS::Resource)&& r[0] != 60 && r[0..1] !="_:" # 60 is the ascii code for '<'
417      return true
418    end
419    return false
420  end
421  def resources(r)
422    self.result.collect {|s,p,o| eval(r.to_s)}
423  end
424  def auto_enable_or_find_and_add_repository(uri)
425   
426  end
427  #The to_resource method is necessary because the ActiveRDF Query only accept a RDFS::Resource, a Literal(String) or a Ruby Symbol as parameter.
428  #Convert a string to RDFS:Resource or symbol. The String should be in the format: "SOME TEXT"
429  def to_resource(term,symbol)   
430    if term == symbol
431      return term
432    elsif term == nil
433      symbol.to_sym
434    elsif term.instance_of? BNode 
435      term
436    elsif term.instance_of?(RDFS::Resource)
437      term
438    elsif term[0..1]=="_:"
439      BNode.new term[2..term.size]
440    elsif term[0] == 60
441      RDFS::Resource.new term
442    else
443      term
444    end   
445  end   
446end
Note: See TracBrowser for help on using the browser.