Updated the function submit_query_input_in_chunks, but reducing the number of initial jobs, the default slice length, and adding an if block to control the submission of the first batch of jobs

This commit is contained in:
yandj 2019-03-27 18:45:24 -04:00
parent 0bb37b1178
commit 7cca77380b
2 changed files with 58 additions and 47 deletions

View File

@ -1,5 +1,5 @@
source "https://rubygems.org"
#ruby-2.3.0-dev
#ruby-2.3.0
gem 'rest-client'
gem 'yard'

View File

@ -150,34 +150,37 @@ module ClassyFireAPI
# @param: slice_length [Integer] The maximum number of entries for each query input (the whole file
# is fragmented into n part of #slice_length entries each).
# @param: start [Integer] The starting index. Submit framgments from the index 'start'.
def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length=1000, start=1, type='STRUCTURE')
def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length=10, start=1, type='STRUCTURE')
@start_time = Time.now
absolute_path = File.expand_path(input_file)
f = File.open(absolute_path, 'r')
input = []
lines = File.readlines(absolute_path)
puts lines.length, lines[0]
i = 0
lines.uniq.each do |line|
i += 1
sline = line.strip.split("\t")
if sline.length == 1
input <<"#{sline[0]}"
elsif sline.length == 2
elsif sline.length >= 2
#ID\tSMILES (OR INCHI, OR VALID IUPAC NAME)
input <<"#{sline[0]}\t#{sline[1]}"
end
# input <<"#{sline[0]}"
end
# puts "=============",input.length, input[0]
query_ids = []
subdivised_groups = input.uniq.each_slice(slice_length).to_a
puts "nr of subdivised_groups: #{subdivised_groups.length}"
# puts subdivised_groups[0]
sleeping_time = 240
initial_nr_of_jobs = 30
sleeping_time = 60
initial_nr_of_jobs = 2
i = start
if i < initial_nr_of_jobs
while i < initial_nr_of_jobs
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{i}"
@ -188,8 +191,9 @@ module ClassyFireAPI
puts "submitting #{title}"
# puts subdivised_groups[i-1].join("\n")
q = submit_query(title,subdivised_groups[i-1].join("\n"),type)
# puts q
puts JSON.parse(q)['id']
query_ids << JSON.parse(q)['id']
sleep(10)
rescue Exception => e
puts e.message
puts e.backtrace.inspect
@ -198,20 +202,26 @@ module ClassyFireAPI
else
break
end
query_ids
end
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
sleep(sleeping_time)
puts "Waking up at #{Time.now - @start_time}"
end
while i >= initial_nr_of_jobs && i < subdivised_groups.length
k = 0
for k in (i..(i + initial_nr_of_jobs))
for k in (i...(i + initial_nr_of_jobs))
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{k}"
i = i + 1
begin
puts "submitting #{title}"
q = submit_query(title,subdivised_groups[k-1].join("\n"),type)
puts JSON.parse(q)['id']
query_ids << JSON.parse(q)['id']
sleep(10)
rescue Exception => e
puts e.message
puts e.backtrace.inspect
@ -222,6 +232,7 @@ module ClassyFireAPI
sleep(sleeping_time)
puts "Waking up at #{Time.now - @start_time}"
end
end
# Takes each file in a folder, and submit the contained structures in bluks of a given size.