Updated the function submit_query_input_in_chunks, but reducing the number of initial jobs, the default slice length, and adding an if block to control the submission of the first batch of jobs

This commit is contained in:
yandj 2019-03-27 18:45:24 -04:00
parent 0bb37b1178
commit 7cca77380b
2 changed files with 58 additions and 47 deletions

View File

@ -1,5 +1,5 @@
source "https://rubygems.org"
#ruby-2.3.0-dev
#ruby-2.3.0
gem 'rest-client'
gem 'yard'

View File

@ -34,7 +34,7 @@ module ClassyFireAPI
#
# @param query_id [Integer] the ID of the query.
# @param format [String] the format of the query (either JSON, CSV, or SDF)
# @return [Text] A text file displaying the classification results for
# @return [Text] A text file displaying the classification results for
# the query's entities in the specified format.
def ClassyFireAPI.get_query(query_id,format="json")
begin
@ -53,13 +53,13 @@ module ClassyFireAPI
e.response
rescue RestClient::RequestTimeout => e
e.response
end
end
end
# Return data for the TaxNode with ID chemontid.
#
# @param chemontid [String] the ChemOnt ID of the entity.
# @return [Text] A text displaying the classification results for the entity in the specified format.
# @return [Text] A text displaying the classification results for the entity in the specified format.
# Use JSON.parse to get a the json object.
def ClassyFireAPI.get_chemont_node(chemontid)
chemont_id = chemontid.to_s.gsub("CHEMONTID:","C")
@ -86,7 +86,7 @@ module ClassyFireAPI
begin
if format == "json"
RestClient.get "#{URL}/entities/#{fingerprint}.#{format}", :accept => :json
end
end
rescue RestClient::ResourceNotFound => e
e.response
rescue RestClient::InternalServerError => e
@ -112,7 +112,7 @@ module ClassyFireAPI
RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :sdf
elsif format == "csv"
RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :csv
end
end
rescue RestClient::ResourceNotFound => e
e.response
rescue RestClient::InternalServerError => e
@ -138,82 +138,92 @@ module ClassyFireAPI
end
# Takes a tab-separated file and submit the contained structures in bulks of a given size
#
#
# For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either
# 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name
# for the 'IUPAC NAME' query type.
# 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the
# 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the
# 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type.
#
#
# For 'FASTA' query type, just submit the query as a standard FASTA text.
# @param input_file [Text] The path to the input file.
# @param: slice_length [Integer] The maximum number of entries for each query input (the whole file
# @param: slice_length [Integer] The maximum number of entries for each query input (the whole file
# is fragmented into n part of #slice_length entries each).
# @param: start [Integer] The starting index. Submit framgments from the index 'start'.
def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length=1000, start=1, type='STRUCTURE')
def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length=10, start=1, type='STRUCTURE')
@start_time = Time.now
absolute_path = File.expand_path(input_file)
f = File.open(absolute_path, 'r')
input = []
lines = File.readlines(absolute_path)
puts lines.length, lines[0]
i = 0
lines.uniq.each do |line|
i += 1
sline = line.strip.split("\t")
if sline.length == 1
input <<"#{sline[0]}"
elsif sline.length == 2
elsif sline.length >= 2
#ID\tSMILES (OR INCHI, OR VALID IUPAC NAME)
input <<"#{sline[0]}\t#{sline[1]}"
end
# input <<"#{sline[0]}"
end
# puts "=============",input.length, input[0]
query_ids = []
subdivised_groups = input.uniq.each_slice(slice_length).to_a
puts "nr of subdivised_groups: #{subdivised_groups.length}"
# puts subdivised_groups[0]
sleeping_time = 240
initial_nr_of_jobs = 30
sleeping_time = 60
initial_nr_of_jobs = 2
i = start
while i < initial_nr_of_jobs
if i < initial_nr_of_jobs
while i < initial_nr_of_jobs
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{i}"
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{i}"
if i <= subdivised_groups.length
puts "\n\n\n\n---------------------- -----------"
begin
puts "submitting #{title}"
# puts subdivised_groups[i-1].join("\n")
q = submit_query(title,subdivised_groups[i-1].join("\n"),type)
# puts q
query_ids << JSON.parse(q)['id']
rescue Exception => e
puts e.message
puts e.backtrace.inspect
if i <= subdivised_groups.length
puts "\n\n\n\n---------------------- -----------"
begin
puts "submitting #{title}"
# puts subdivised_groups[i-1].join("\n")
q = submit_query(title,subdivised_groups[i-1].join("\n"),type)
puts JSON.parse(q)['id']
query_ids << JSON.parse(q)['id']
sleep(10)
rescue Exception => e
puts e.message
puts e.backtrace.inspect
end
i = i + 1
else
break
end
i = i + 1
else
break
query_ids
end
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
sleep(sleeping_time)
puts "Waking up at #{Time.now - @start_time}"
end
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
sleep(sleeping_time)
puts "Waking up at #{Time.now - @start_time}"
while i >= initial_nr_of_jobs && i < subdivised_groups.length
k = 0
for k in (i..(i + initial_nr_of_jobs))
for k in (i...(i + initial_nr_of_jobs))
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{k}"
i = i + 1
begin
puts "submitting #{title}"
q = submit_query(title,subdivised_groups[k-1].join("\n"),type)
rescue Exception => e
puts e.message
puts JSON.parse(q)['id']
query_ids << JSON.parse(q)['id']
sleep(10)
rescue Exception => e
puts e.message
puts e.backtrace.inspect
end
end
@ -222,6 +232,7 @@ module ClassyFireAPI
sleep(sleeping_time)
puts "Waking up at #{Time.now - @start_time}"
end
end
# Takes each file in a folder, and submit the contained structures in bluks of a given size.
@ -229,13 +240,13 @@ module ClassyFireAPI
# For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either
# 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name
# for the 'IUPAC NAME' query type.
# 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the
# 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the
# 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type.
#
#
# For 'FASTA' query type, just submit the query as a standard FASTA text.
# @param: input_file [String] The path to the folder.
# @param: slice_length [Integer] The maximum number of entries for each query input (each file
# is fragmented into n part of #slice_length entries each), 'integer'
# @param: slice_length [Integer] The maximum number of entries for each query input (each file
# is fragmented into n part of #slice_length entries each), 'integer'
# @param type [String] the query_type 'STRUCTURE' (default) or 'IUPAC_NAME' or 'FASTA'
def ClassyFireAPI.submit_queries_from_directory(folder,slice_length,type="STRUCTURE")
if File.directory?(folder)
@ -249,7 +260,7 @@ module ClassyFireAPI
end
# Reads a tab separated file, and use the structure representation
# Reads a tab separated file, and use the structure representation
#to retrieve the strutcure's classification from ClassyFire.
#
# @param input [String] path to the input file.
@ -311,7 +322,7 @@ module ClassyFireAPI
# res += ","
rescue Exception => e
e.message
end
end
end
res += "]}"
f_output.print res
@ -328,7 +339,7 @@ module ClassyFireAPI
h = Hash.new
directory = absolute_path.split('/')[0...-1].join("/")
f_output = File.new(output, 'w')
puts
puts
res = String.new
res += "{"
@ -347,7 +358,7 @@ module ClassyFireAPI
puts i
# puts "#{key} :: #{h[key]}"
begin
inchikey = %x(obabel -:"#{h[key]}" -oinchikey).strip.split("\t")[0]
inchikey = %x(obabel -:"#{h[key]}" -oinchikey).strip.split("\t")[0]
# puts inchikey
qr = JSON.parse(ClassyFireAPI.get_entity_classification(inchikey,format="json"))
qr['identifier'] = key
@ -371,7 +382,7 @@ module ClassyFireAPI
# res += ","
rescue Exception => e
e.message
end
end
end
res += "]}"
f_output.print res
@ -439,7 +450,7 @@ module ClassyFireAPI
end
rescue Exception => e
e.message
end
end
end
end
end