From 7cca77380b9f3026cfc719201c307c33ade65d8b Mon Sep 17 00:00:00 2001 From: yandj Date: Wed, 27 Mar 2019 18:45:24 -0400 Subject: [PATCH] Updated the function submit_query_input_in_chunks, but reducing the number of initial jobs, the default slice length, and adding an if block to control the submission of the first batch of jobs --- Gemfile | 2 +- lib/classyfire_api.rb | 103 +++++++++++++++++++++++------------------- 2 files changed, 58 insertions(+), 47 deletions(-) diff --git a/Gemfile b/Gemfile index a43f9f6..f231a31 100644 --- a/Gemfile +++ b/Gemfile @@ -1,5 +1,5 @@ source "https://rubygems.org" -#ruby-2.3.0-dev +#ruby-2.3.0 gem 'rest-client' gem 'yard' diff --git a/lib/classyfire_api.rb b/lib/classyfire_api.rb index 782df44..606bf80 100644 --- a/lib/classyfire_api.rb +++ b/lib/classyfire_api.rb @@ -34,7 +34,7 @@ module ClassyFireAPI # # @param query_id [Integer] the ID of the query. # @param format [String] the format of the query (either JSON, CSV, or SDF) - # @return [Text] A text file displaying the classification results for + # @return [Text] A text file displaying the classification results for # the query's entities in the specified format. def ClassyFireAPI.get_query(query_id,format="json") begin @@ -53,13 +53,13 @@ module ClassyFireAPI e.response rescue RestClient::RequestTimeout => e e.response - end + end end # Return data for the TaxNode with ID chemontid. # # @param chemontid [String] the ChemOnt ID of the entity. - # @return [Text] A text displaying the classification results for the entity in the specified format. + # @return [Text] A text displaying the classification results for the entity in the specified format. # Use JSON.parse to get a the json object. def ClassyFireAPI.get_chemont_node(chemontid) chemont_id = chemontid.to_s.gsub("CHEMONTID:","C") @@ -86,7 +86,7 @@ module ClassyFireAPI begin if format == "json" RestClient.get "#{URL}/entities/#{fingerprint}.#{format}", :accept => :json - end + end rescue RestClient::ResourceNotFound => e e.response rescue RestClient::InternalServerError => e @@ -112,7 +112,7 @@ module ClassyFireAPI RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :sdf elsif format == "csv" RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :csv - end + end rescue RestClient::ResourceNotFound => e e.response rescue RestClient::InternalServerError => e @@ -138,82 +138,92 @@ module ClassyFireAPI end # Takes a tab-separated file and submit the contained structures in bulks of a given size - # + # # For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either # 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name # for the 'IUPAC NAME' query type. - # 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the + # 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the # 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type. - # + # # For 'FASTA' query type, just submit the query as a standard FASTA text. # @param input_file [Text] The path to the input file. - # @param: slice_length [Integer] The maximum number of entries for each query input (the whole file + # @param: slice_length [Integer] The maximum number of entries for each query input (the whole file # is fragmented into n part of #slice_length entries each). # @param: start [Integer] The starting index. Submit framgments from the index 'start'. - def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length=1000, start=1, type='STRUCTURE') + def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length=10, start=1, type='STRUCTURE') @start_time = Time.now absolute_path = File.expand_path(input_file) f = File.open(absolute_path, 'r') input = [] + lines = File.readlines(absolute_path) + puts lines.length, lines[0] i = 0 lines.uniq.each do |line| i += 1 sline = line.strip.split("\t") if sline.length == 1 input <<"#{sline[0]}" - elsif sline.length == 2 + elsif sline.length >= 2 #ID\tSMILES (OR INCHI, OR VALID IUPAC NAME) input <<"#{sline[0]}\t#{sline[1]}" end # input <<"#{sline[0]}" end - + # puts "=============",input.length, input[0] query_ids = [] subdivised_groups = input.uniq.each_slice(slice_length).to_a puts "nr of subdivised_groups: #{subdivised_groups.length}" # puts subdivised_groups[0] - sleeping_time = 240 - initial_nr_of_jobs = 30 + sleeping_time = 60 + initial_nr_of_jobs = 2 i = start - while i < initial_nr_of_jobs + if i < initial_nr_of_jobs + while i < initial_nr_of_jobs - title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{i}" + title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{i}" - if i <= subdivised_groups.length - puts "\n\n\n\n---------------------- -----------" - begin - puts "submitting #{title}" - # puts subdivised_groups[i-1].join("\n") - q = submit_query(title,subdivised_groups[i-1].join("\n"),type) - # puts q - query_ids << JSON.parse(q)['id'] - rescue Exception => e - puts e.message - puts e.backtrace.inspect + if i <= subdivised_groups.length + puts "\n\n\n\n---------------------- -----------" + begin + puts "submitting #{title}" + # puts subdivised_groups[i-1].join("\n") + q = submit_query(title,subdivised_groups[i-1].join("\n"),type) + puts JSON.parse(q)['id'] + query_ids << JSON.parse(q)['id'] + sleep(10) + rescue Exception => e + puts e.message + puts e.backtrace.inspect + end + i = i + 1 + else + break end - i = i + 1 - else - break + query_ids end + puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s." + sleep(sleeping_time) + puts "Waking up at #{Time.now - @start_time}" end - puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s." - sleep(sleeping_time) - puts "Waking up at #{Time.now - @start_time}" + while i >= initial_nr_of_jobs && i < subdivised_groups.length k = 0 - for k in (i..(i + initial_nr_of_jobs)) + for k in (i...(i + initial_nr_of_jobs)) title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{k}" i = i + 1 begin puts "submitting #{title}" q = submit_query(title,subdivised_groups[k-1].join("\n"),type) - rescue Exception => e - puts e.message + puts JSON.parse(q)['id'] + query_ids << JSON.parse(q)['id'] + sleep(10) + rescue Exception => e + puts e.message puts e.backtrace.inspect end end @@ -222,6 +232,7 @@ module ClassyFireAPI sleep(sleeping_time) puts "Waking up at #{Time.now - @start_time}" end + end # Takes each file in a folder, and submit the contained structures in bluks of a given size. @@ -229,13 +240,13 @@ module ClassyFireAPI # For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either # 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name # for the 'IUPAC NAME' query type. - # 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the + # 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the # 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type. - # + # # For 'FASTA' query type, just submit the query as a standard FASTA text. # @param: input_file [String] The path to the folder. - # @param: slice_length [Integer] The maximum number of entries for each query input (each file - # is fragmented into n part of #slice_length entries each), 'integer' + # @param: slice_length [Integer] The maximum number of entries for each query input (each file + # is fragmented into n part of #slice_length entries each), 'integer' # @param type [String] the query_type 'STRUCTURE' (default) or 'IUPAC_NAME' or 'FASTA' def ClassyFireAPI.submit_queries_from_directory(folder,slice_length,type="STRUCTURE") if File.directory?(folder) @@ -249,7 +260,7 @@ module ClassyFireAPI end - # Reads a tab separated file, and use the structure representation + # Reads a tab separated file, and use the structure representation #to retrieve the strutcure's classification from ClassyFire. # # @param input [String] path to the input file. @@ -311,7 +322,7 @@ module ClassyFireAPI # res += "," rescue Exception => e e.message - end + end end res += "]}" f_output.print res @@ -328,7 +339,7 @@ module ClassyFireAPI h = Hash.new directory = absolute_path.split('/')[0...-1].join("/") f_output = File.new(output, 'w') - puts + puts res = String.new res += "{" @@ -347,7 +358,7 @@ module ClassyFireAPI puts i # puts "#{key} :: #{h[key]}" begin - inchikey = %x(obabel -:"#{h[key]}" -oinchikey).strip.split("\t")[0] + inchikey = %x(obabel -:"#{h[key]}" -oinchikey).strip.split("\t")[0] # puts inchikey qr = JSON.parse(ClassyFireAPI.get_entity_classification(inchikey,format="json")) qr['identifier'] = key @@ -371,7 +382,7 @@ module ClassyFireAPI # res += "," rescue Exception => e e.message - end + end end res += "]}" f_output.print res @@ -439,7 +450,7 @@ module ClassyFireAPI end rescue Exception => e e.message - end + end end end end