Updated the function submit_query_input_in_chunks, but reducing the number of initial jobs, the default slice length, and adding an if block to control the submission of the first batch of jobs
This commit is contained in:
parent
0bb37b1178
commit
7cca77380b
2
Gemfile
2
Gemfile
|
@ -1,5 +1,5 @@
|
||||||
source "https://rubygems.org"
|
source "https://rubygems.org"
|
||||||
#ruby-2.3.0-dev
|
#ruby-2.3.0
|
||||||
|
|
||||||
gem 'rest-client'
|
gem 'rest-client'
|
||||||
gem 'yard'
|
gem 'yard'
|
||||||
|
|
|
@ -150,68 +150,78 @@ module ClassyFireAPI
|
||||||
# @param: slice_length [Integer] The maximum number of entries for each query input (the whole file
|
# @param: slice_length [Integer] The maximum number of entries for each query input (the whole file
|
||||||
# is fragmented into n part of #slice_length entries each).
|
# is fragmented into n part of #slice_length entries each).
|
||||||
# @param: start [Integer] The starting index. Submit framgments from the index 'start'.
|
# @param: start [Integer] The starting index. Submit framgments from the index 'start'.
|
||||||
def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length=1000, start=1, type='STRUCTURE')
|
def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length=10, start=1, type='STRUCTURE')
|
||||||
@start_time = Time.now
|
@start_time = Time.now
|
||||||
absolute_path = File.expand_path(input_file)
|
absolute_path = File.expand_path(input_file)
|
||||||
f = File.open(absolute_path, 'r')
|
f = File.open(absolute_path, 'r')
|
||||||
input = []
|
input = []
|
||||||
|
|
||||||
|
|
||||||
lines = File.readlines(absolute_path)
|
lines = File.readlines(absolute_path)
|
||||||
|
puts lines.length, lines[0]
|
||||||
i = 0
|
i = 0
|
||||||
lines.uniq.each do |line|
|
lines.uniq.each do |line|
|
||||||
i += 1
|
i += 1
|
||||||
sline = line.strip.split("\t")
|
sline = line.strip.split("\t")
|
||||||
if sline.length == 1
|
if sline.length == 1
|
||||||
input <<"#{sline[0]}"
|
input <<"#{sline[0]}"
|
||||||
elsif sline.length == 2
|
elsif sline.length >= 2
|
||||||
#ID\tSMILES (OR INCHI, OR VALID IUPAC NAME)
|
#ID\tSMILES (OR INCHI, OR VALID IUPAC NAME)
|
||||||
input <<"#{sline[0]}\t#{sline[1]}"
|
input <<"#{sline[0]}\t#{sline[1]}"
|
||||||
end
|
end
|
||||||
# input <<"#{sline[0]}"
|
# input <<"#{sline[0]}"
|
||||||
end
|
end
|
||||||
|
# puts "=============",input.length, input[0]
|
||||||
query_ids = []
|
query_ids = []
|
||||||
subdivised_groups = input.uniq.each_slice(slice_length).to_a
|
subdivised_groups = input.uniq.each_slice(slice_length).to_a
|
||||||
puts "nr of subdivised_groups: #{subdivised_groups.length}"
|
puts "nr of subdivised_groups: #{subdivised_groups.length}"
|
||||||
# puts subdivised_groups[0]
|
# puts subdivised_groups[0]
|
||||||
sleeping_time = 240
|
sleeping_time = 60
|
||||||
initial_nr_of_jobs = 30
|
initial_nr_of_jobs = 2
|
||||||
i = start
|
i = start
|
||||||
|
|
||||||
while i < initial_nr_of_jobs
|
if i < initial_nr_of_jobs
|
||||||
|
while i < initial_nr_of_jobs
|
||||||
|
|
||||||
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{i}"
|
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{i}"
|
||||||
|
|
||||||
if i <= subdivised_groups.length
|
if i <= subdivised_groups.length
|
||||||
puts "\n\n\n\n---------------------- -----------"
|
puts "\n\n\n\n---------------------- -----------"
|
||||||
begin
|
begin
|
||||||
puts "submitting #{title}"
|
puts "submitting #{title}"
|
||||||
# puts subdivised_groups[i-1].join("\n")
|
# puts subdivised_groups[i-1].join("\n")
|
||||||
q = submit_query(title,subdivised_groups[i-1].join("\n"),type)
|
q = submit_query(title,subdivised_groups[i-1].join("\n"),type)
|
||||||
# puts q
|
puts JSON.parse(q)['id']
|
||||||
query_ids << JSON.parse(q)['id']
|
query_ids << JSON.parse(q)['id']
|
||||||
rescue Exception => e
|
sleep(10)
|
||||||
puts e.message
|
rescue Exception => e
|
||||||
puts e.backtrace.inspect
|
puts e.message
|
||||||
|
puts e.backtrace.inspect
|
||||||
|
end
|
||||||
|
i = i + 1
|
||||||
|
else
|
||||||
|
break
|
||||||
end
|
end
|
||||||
i = i + 1
|
query_ids
|
||||||
else
|
|
||||||
break
|
|
||||||
end
|
end
|
||||||
|
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
|
||||||
|
sleep(sleeping_time)
|
||||||
|
puts "Waking up at #{Time.now - @start_time}"
|
||||||
end
|
end
|
||||||
|
|
||||||
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
|
|
||||||
sleep(sleeping_time)
|
|
||||||
puts "Waking up at #{Time.now - @start_time}"
|
|
||||||
|
|
||||||
while i >= initial_nr_of_jobs && i < subdivised_groups.length
|
while i >= initial_nr_of_jobs && i < subdivised_groups.length
|
||||||
k = 0
|
k = 0
|
||||||
for k in (i..(i + initial_nr_of_jobs))
|
for k in (i...(i + initial_nr_of_jobs))
|
||||||
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{k}"
|
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{k}"
|
||||||
i = i + 1
|
i = i + 1
|
||||||
begin
|
begin
|
||||||
puts "submitting #{title}"
|
puts "submitting #{title}"
|
||||||
q = submit_query(title,subdivised_groups[k-1].join("\n"),type)
|
q = submit_query(title,subdivised_groups[k-1].join("\n"),type)
|
||||||
|
puts JSON.parse(q)['id']
|
||||||
|
query_ids << JSON.parse(q)['id']
|
||||||
|
sleep(10)
|
||||||
rescue Exception => e
|
rescue Exception => e
|
||||||
puts e.message
|
puts e.message
|
||||||
puts e.backtrace.inspect
|
puts e.backtrace.inspect
|
||||||
|
@ -222,6 +232,7 @@ module ClassyFireAPI
|
||||||
sleep(sleeping_time)
|
sleep(sleeping_time)
|
||||||
puts "Waking up at #{Time.now - @start_time}"
|
puts "Waking up at #{Time.now - @start_time}"
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# Takes each file in a folder, and submit the contained structures in bluks of a given size.
|
# Takes each file in a folder, and submit the contained structures in bluks of a given size.
|
||||||
|
|
Loading…
Reference in New Issue