Updated the function submit_query_input_in_chunks, but reducing the number of initial jobs, the default slice length, and adding an if block to control the submission of the first batch of jobs

This commit is contained in:
yandj 2019-03-27 18:45:24 -04:00
parent 0bb37b1178
commit 7cca77380b
2 changed files with 58 additions and 47 deletions

View File

@ -1,5 +1,5 @@
source "https://rubygems.org" source "https://rubygems.org"
#ruby-2.3.0-dev #ruby-2.3.0
gem 'rest-client' gem 'rest-client'
gem 'yard' gem 'yard'

View File

@ -150,34 +150,37 @@ module ClassyFireAPI
# @param: slice_length [Integer] The maximum number of entries for each query input (the whole file # @param: slice_length [Integer] The maximum number of entries for each query input (the whole file
# is fragmented into n part of #slice_length entries each). # is fragmented into n part of #slice_length entries each).
# @param: start [Integer] The starting index. Submit framgments from the index 'start'. # @param: start [Integer] The starting index. Submit framgments from the index 'start'.
def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length=1000, start=1, type='STRUCTURE') def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length=10, start=1, type='STRUCTURE')
@start_time = Time.now @start_time = Time.now
absolute_path = File.expand_path(input_file) absolute_path = File.expand_path(input_file)
f = File.open(absolute_path, 'r') f = File.open(absolute_path, 'r')
input = [] input = []
lines = File.readlines(absolute_path) lines = File.readlines(absolute_path)
puts lines.length, lines[0]
i = 0 i = 0
lines.uniq.each do |line| lines.uniq.each do |line|
i += 1 i += 1
sline = line.strip.split("\t") sline = line.strip.split("\t")
if sline.length == 1 if sline.length == 1
input <<"#{sline[0]}" input <<"#{sline[0]}"
elsif sline.length == 2 elsif sline.length >= 2
#ID\tSMILES (OR INCHI, OR VALID IUPAC NAME) #ID\tSMILES (OR INCHI, OR VALID IUPAC NAME)
input <<"#{sline[0]}\t#{sline[1]}" input <<"#{sline[0]}\t#{sline[1]}"
end end
# input <<"#{sline[0]}" # input <<"#{sline[0]}"
end end
# puts "=============",input.length, input[0]
query_ids = [] query_ids = []
subdivised_groups = input.uniq.each_slice(slice_length).to_a subdivised_groups = input.uniq.each_slice(slice_length).to_a
puts "nr of subdivised_groups: #{subdivised_groups.length}" puts "nr of subdivised_groups: #{subdivised_groups.length}"
# puts subdivised_groups[0] # puts subdivised_groups[0]
sleeping_time = 240 sleeping_time = 60
initial_nr_of_jobs = 30 initial_nr_of_jobs = 2
i = start i = start
if i < initial_nr_of_jobs
while i < initial_nr_of_jobs while i < initial_nr_of_jobs
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{i}" title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{i}"
@ -188,8 +191,9 @@ module ClassyFireAPI
puts "submitting #{title}" puts "submitting #{title}"
# puts subdivised_groups[i-1].join("\n") # puts subdivised_groups[i-1].join("\n")
q = submit_query(title,subdivised_groups[i-1].join("\n"),type) q = submit_query(title,subdivised_groups[i-1].join("\n"),type)
# puts q puts JSON.parse(q)['id']
query_ids << JSON.parse(q)['id'] query_ids << JSON.parse(q)['id']
sleep(10)
rescue Exception => e rescue Exception => e
puts e.message puts e.message
puts e.backtrace.inspect puts e.backtrace.inspect
@ -198,20 +202,26 @@ module ClassyFireAPI
else else
break break
end end
query_ids
end end
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s." puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
sleep(sleeping_time) sleep(sleeping_time)
puts "Waking up at #{Time.now - @start_time}" puts "Waking up at #{Time.now - @start_time}"
end
while i >= initial_nr_of_jobs && i < subdivised_groups.length while i >= initial_nr_of_jobs && i < subdivised_groups.length
k = 0 k = 0
for k in (i..(i + initial_nr_of_jobs)) for k in (i...(i + initial_nr_of_jobs))
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{k}" title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{k}"
i = i + 1 i = i + 1
begin begin
puts "submitting #{title}" puts "submitting #{title}"
q = submit_query(title,subdivised_groups[k-1].join("\n"),type) q = submit_query(title,subdivised_groups[k-1].join("\n"),type)
puts JSON.parse(q)['id']
query_ids << JSON.parse(q)['id']
sleep(10)
rescue Exception => e rescue Exception => e
puts e.message puts e.message
puts e.backtrace.inspect puts e.backtrace.inspect
@ -222,6 +232,7 @@ module ClassyFireAPI
sleep(sleeping_time) sleep(sleeping_time)
puts "Waking up at #{Time.now - @start_time}" puts "Waking up at #{Time.now - @start_time}"
end end
end end
# Takes each file in a folder, and submit the contained structures in bluks of a given size. # Takes each file in a folder, and submit the contained structures in bluks of a given size.