461 lines
15 KiB
Ruby
461 lines
15 KiB
Ruby
require 'JSON'
|
|
require 'csv'
|
|
require 'rest-client'
|
|
# require 'openbabel'
|
|
# require 'node_resource.rb'
|
|
|
|
module ClassyFireAPI
|
|
URL = 'http://classyfire.wishartlab.com'
|
|
# Submits a ClassyFire query in a JSON format.
|
|
#
|
|
# @param label [String] the label of the query.
|
|
# @param input [String] the input of the query.
|
|
# @param type [String] the type of the query.
|
|
# @return [Hash] A Ruby Hash with the id (and other attributes) of the Query or nil
|
|
# if there is an error. Use JSON.parse to get a the json object.
|
|
def ClassyFireAPI.submit_query(label,input, type='STRUCTURE')
|
|
begin
|
|
q = RestClient.post URL+'/queries', {:label => label, :query_input => input, :query_type => type}.to_json, :accept => :json, :content_type => :json
|
|
rescue RestClient::BadRequest => e
|
|
e.response
|
|
rescue RestClient::InternalServerError => e
|
|
e.response
|
|
rescue RestClient::GatewayTimeout => e
|
|
e.response
|
|
rescue RestClient::RequestTimeout => e
|
|
e.response
|
|
rescue RestClient::UnprocessableEntity => e
|
|
e.response
|
|
end
|
|
q
|
|
end
|
|
|
|
# Retrieves the classification results for a given query.
|
|
#
|
|
# @param query_id [Integer] the ID of the query.
|
|
# @param format [String] the format of the query (either JSON, CSV, or SDF)
|
|
# @return [Text] A text file displaying the classification results for
|
|
# the query's entities in the specified format.
|
|
def ClassyFireAPI.get_query(query_id,format="json")
|
|
begin
|
|
if format == "json"
|
|
RestClient.get "#{URL}/queries/#{query_id}.json", :accept => :json
|
|
elsif format == "sdf"
|
|
RestClient.get "#{URL}/queries/#{query_id}.sdf", :accept => :sdf
|
|
elsif format == "csv"
|
|
RestClient.get "#{URL}/queries/#{query_id}.csv", :accept => :csv
|
|
end
|
|
rescue RestClient::ResourceNotFound => e
|
|
e.response
|
|
rescue RestClient::InternalServerError => e
|
|
e.response
|
|
rescue RestClient::GatewayTimeout => e
|
|
e.response
|
|
rescue RestClient::RequestTimeout => e
|
|
e.response
|
|
end
|
|
end
|
|
|
|
# Return data for the TaxNode with ID chemontid.
|
|
#
|
|
# @param chemontid [String] the ChemOnt ID of the entity.
|
|
# @return [Text] A text displaying the classification results for the entity in the specified format.
|
|
# Use JSON.parse to get a the json object.
|
|
def ClassyFireAPI.get_chemont_node(chemontid)
|
|
chemont_id = chemontid.to_s.gsub("CHEMONTID:","C")
|
|
begin
|
|
RestClient.get "#{URL}/tax_nodes/#{chemont_id}.json", :accept => :json
|
|
rescue RestClient::ResourceNotFound => e
|
|
e.response
|
|
rescue RestClient::InternalServerError => e
|
|
e.response
|
|
rescue RestClient::GatewayTimeout => e
|
|
e.response
|
|
rescue RestClient::RequestTimeout => e
|
|
e.response
|
|
end
|
|
end
|
|
|
|
# Retrieves the classification results for a given sequence.
|
|
#
|
|
# @param fingerprint [String] the fingerprint (generated from the sequence using Digest::MD5).
|
|
# @param format [String] the format of the query (Only JSON)
|
|
# @return [Text] A text displaying the classification results for the entity in the specified format.
|
|
# Use JSON.parse to get a the json object.
|
|
def ClassyFireAPI.get_sequence_classification(fingerprint,format="json")
|
|
begin
|
|
if format == "json"
|
|
RestClient.get "#{URL}/entities/#{fingerprint}.#{format}", :accept => :json
|
|
end
|
|
rescue RestClient::ResourceNotFound => e
|
|
e.response
|
|
rescue RestClient::InternalServerError => e
|
|
e.response
|
|
rescue RestClient::GatewayTimeout => e
|
|
e.response
|
|
rescue RestClient::RequestTimeout => e
|
|
e.response
|
|
end
|
|
end
|
|
|
|
# Retrieves the classification results for a given chemical entity.
|
|
#
|
|
# @param the InChIKey [String] of the entity
|
|
# @param The format [String] the format of the query (Only JSON)
|
|
# @return [Text] A text file displaying the classification results for the entity in the specified format.
|
|
def ClassyFireAPI.get_entity_classification(inchikey,format="json")
|
|
inchikey_id = inchikey.to_s.gsub('InChIKey=','')
|
|
begin
|
|
if format == "json"
|
|
RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :json
|
|
elsif format == "sdf"
|
|
RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :sdf
|
|
elsif format == "csv"
|
|
RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :csv
|
|
end
|
|
rescue RestClient::ResourceNotFound => e
|
|
e.response
|
|
rescue RestClient::InternalServerError => e
|
|
e.response
|
|
rescue RestClient::GatewayTimeout => e
|
|
e.response
|
|
rescue RestClient::RequestTimeout => e
|
|
e.response
|
|
end
|
|
end
|
|
|
|
|
|
# Retrieves the status of a query
|
|
# @param query_id [Integer] the ID of the query
|
|
# @return [String] the query status, 'Done' or 'In progress', 'string'
|
|
def ClassyFireAPI.query_status(query_id)
|
|
begin
|
|
RestClient.get "#{URL}/queries/#{query_id}/status.json", :accept => :json
|
|
rescue Exception=>e
|
|
$stderr.puts e.message
|
|
nil
|
|
end
|
|
end
|
|
|
|
# Takes a tab-separated file and submit the contained structures in bulks of a given size
|
|
#
|
|
# For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either
|
|
# 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name
|
|
# for the 'IUPAC NAME' query type.
|
|
# 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the
|
|
# 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type.
|
|
#
|
|
# For 'FASTA' query type, just submit the query as a standard FASTA text.
|
|
# @param input_file [Text] The path to the input file.
|
|
# @param: slice_length [Integer] The maximum number of entries for each query input (the whole file
|
|
# is fragmented into n part of #slice_length entries each).
|
|
# @param: start [Integer] The starting index. Submit framgments from the index 'start'.
|
|
def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length=10, start=1, type='STRUCTURE')
|
|
@start_time = Time.now
|
|
absolute_path = File.expand_path(input_file)
|
|
f = File.open(absolute_path, 'r')
|
|
input = []
|
|
|
|
|
|
lines = File.readlines(absolute_path)
|
|
puts lines.length, lines[0]
|
|
i = 0
|
|
lines.uniq.each do |line|
|
|
i += 1
|
|
sline = line.strip.split("\t")
|
|
if sline.length == 1
|
|
input <<"#{sline[0]}"
|
|
elsif sline.length >= 2
|
|
#ID\tSMILES (OR INCHI, OR VALID IUPAC NAME)
|
|
input <<"#{sline[0]}\t#{sline[1]}"
|
|
end
|
|
# input <<"#{sline[0]}"
|
|
end
|
|
# puts "=============",input.length, input[0]
|
|
query_ids = []
|
|
subdivised_groups = input.uniq.each_slice(slice_length).to_a
|
|
puts "nr of subdivised_groups: #{subdivised_groups.length}"
|
|
# puts subdivised_groups[0]
|
|
sleeping_time = 60
|
|
initial_nr_of_jobs = 2
|
|
i = start
|
|
|
|
if i < initial_nr_of_jobs
|
|
while i < initial_nr_of_jobs
|
|
|
|
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{i}"
|
|
|
|
if i <= subdivised_groups.length
|
|
puts "\n\n\n\n---------------------- -----------"
|
|
begin
|
|
puts "submitting #{title}"
|
|
# puts subdivised_groups[i-1].join("\n")
|
|
q = submit_query(title,subdivised_groups[i-1].join("\n"),type)
|
|
puts JSON.parse(q)['id']
|
|
query_ids << JSON.parse(q)['id']
|
|
sleep(10)
|
|
rescue Exception => e
|
|
puts e.message
|
|
puts e.backtrace.inspect
|
|
end
|
|
i = i + 1
|
|
else
|
|
break
|
|
end
|
|
query_ids
|
|
end
|
|
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
|
|
sleep(sleeping_time)
|
|
puts "Waking up at #{Time.now - @start_time}"
|
|
end
|
|
|
|
|
|
|
|
while i >= initial_nr_of_jobs && i < subdivised_groups.length
|
|
k = 0
|
|
for k in (i...(i + initial_nr_of_jobs))
|
|
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{k}"
|
|
i = i + 1
|
|
begin
|
|
puts "submitting #{title}"
|
|
q = submit_query(title,subdivised_groups[k-1].join("\n"),type)
|
|
puts JSON.parse(q)['id']
|
|
query_ids << JSON.parse(q)['id']
|
|
sleep(10)
|
|
rescue Exception => e
|
|
puts e.message
|
|
puts e.backtrace.inspect
|
|
end
|
|
end
|
|
i = k
|
|
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
|
|
sleep(sleeping_time)
|
|
puts "Waking up at #{Time.now - @start_time}"
|
|
end
|
|
|
|
end
|
|
|
|
# Takes each file in a folder, and submit the contained structures in bluks of a given size.
|
|
#
|
|
# For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either
|
|
# 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name
|
|
# for the 'IUPAC NAME' query type.
|
|
# 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the
|
|
# 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type.
|
|
#
|
|
# For 'FASTA' query type, just submit the query as a standard FASTA text.
|
|
# @param: input_file [String] The path to the folder.
|
|
# @param: slice_length [Integer] The maximum number of entries for each query input (each file
|
|
# is fragmented into n part of #slice_length entries each), 'integer'
|
|
# @param type [String] the query_type 'STRUCTURE' (default) or 'IUPAC_NAME' or 'FASTA'
|
|
def ClassyFireAPI.submit_queries_from_directory(folder,slice_length,type="STRUCTURE")
|
|
if File.directory?(folder)
|
|
Dir.foreach(folder) do |filename|
|
|
puts "Filename: #{filename}"
|
|
ClassyFireAPI.submit_query_input_in_chunks(folder+"/"+filename,slice_length, type) unless filename[0] == "." || File.directory?(filenmae)
|
|
end
|
|
else
|
|
$stderr.puts "#{folder} is not a folder."
|
|
end
|
|
end
|
|
|
|
|
|
# Reads a tab separated file, and use the structure representation
|
|
#to retrieve the strutcure's classification from ClassyFire.
|
|
#
|
|
# @param input [String] path to the input file.
|
|
# @return [String] path to the output file.
|
|
def ClassyFireAPI.retrieve_classification(input,output)
|
|
absolute_path = File.expand_path(input)
|
|
f_input = File.open(absolute_path, 'r')
|
|
h = Hash.new
|
|
directory = absolute_path.split('/')[0...-1].join("/")
|
|
f_output = File.new(output, 'w')
|
|
res = String.new
|
|
|
|
|
|
res += "{"
|
|
res += '"id": 1,'
|
|
res += '"label":"' + output + '",' + '"classification_status":"Done",' + '"entities":['
|
|
|
|
f_input.each_line do |line|
|
|
sline = line.strip.split("\t")
|
|
if sline.length == 1
|
|
h[sline[0]] = sline[0]
|
|
elsif sline.length == 2
|
|
h[sline[0]] = line.strip
|
|
end
|
|
end
|
|
|
|
puts h.keys.uniq.length
|
|
if h.keys.length > 0
|
|
i = 1
|
|
h.keys.uniq[0..-1].each do |key|
|
|
puts i
|
|
|
|
puts "#{key} :: #{h[key]}"
|
|
begin
|
|
qs = submit_query(key,h[key])
|
|
|
|
qs_decoded = JSON.parse(qs)
|
|
qr = JSON.parse(get_query(qs_decoded["id"],format="json"))
|
|
|
|
res += qr["entities"][0].to_json
|
|
res += ","
|
|
i += 1
|
|
rescue Exception => e
|
|
e.message
|
|
end
|
|
|
|
end
|
|
key = h.keys[-1]
|
|
puts "#{key} :: #{h[key]}"
|
|
begin
|
|
qs = submit_query(key,h[key])
|
|
sleep(0.2)
|
|
qs_decoded = JSON.parse(qs)
|
|
qr = JSON.parse(get_query(qs_decoded["id"],format="json"))
|
|
# puts qr["entities"]
|
|
# sleep(0.2)
|
|
# f_output.print qr["entities"][0],"\n"
|
|
res += qr["entities"][0].to_json
|
|
# res += ","
|
|
rescue Exception => e
|
|
e.message
|
|
end
|
|
end
|
|
res += "]}"
|
|
f_output.print res
|
|
end
|
|
|
|
# Reads a tab separated file, and use the structure representation
|
|
# to retrieve the strutcure's classification from ClassyFire in a JSON format.
|
|
#
|
|
# @param input [String] path to the input file
|
|
# @return [String] path to the output file
|
|
def ClassyFireAPI.retrieve_entities_json(input,output)
|
|
absolute_path = File.expand_path(input)
|
|
f_input = File.open(absolute_path, 'r')
|
|
h = Hash.new
|
|
directory = absolute_path.split('/')[0...-1].join("/")
|
|
f_output = File.new(output, 'w')
|
|
puts
|
|
res = String.new
|
|
|
|
res += "{"
|
|
res += '"id": 1,'
|
|
res += '"label":"' + output + '",' + '"classification_status":"Done",' + '"entities":['
|
|
|
|
f_input.each_line do |line|
|
|
sline = line.strip.split("\t")
|
|
h[sline[0]] = sline[-1]
|
|
end
|
|
|
|
puts h.keys.uniq.length
|
|
if h.keys.length > 0
|
|
i = 1
|
|
h.keys.uniq[0...-1].each do |key|
|
|
puts i
|
|
# puts "#{key} :: #{h[key]}"
|
|
begin
|
|
inchikey = %x(obabel -:"#{h[key]}" -oinchikey).strip.split("\t")[0]
|
|
# puts inchikey
|
|
qr = JSON.parse(ClassyFireAPI.get_entity_classification(inchikey,format="json"))
|
|
qr['identifier'] = key
|
|
res += qr.to_json
|
|
res += ","
|
|
puts "#{key} :: RETURN NIL" if qr.nil? || qr['direct_parent']['name'].nil?
|
|
rescue Exception => e
|
|
e.message
|
|
end
|
|
i += 1
|
|
end
|
|
key = h.keys[-1]
|
|
# puts "#{key} :: #{h[key]}"
|
|
begin
|
|
inchikey = %x(obabel -:"#{h[key]}" -oinchikey).strip.split("\t")[0]
|
|
# puts inchikey
|
|
qr = JSON.parse(ClassyFireAPI.get_entity_classification(inchikey,format="json"))
|
|
qr['identifier'] = key
|
|
res += qr.to_json
|
|
puts "#{key} :: RETURN NIL" if qr.nil? || qr['direct_parent']['name'].nil?
|
|
# res += ","
|
|
rescue Exception => e
|
|
e.message
|
|
end
|
|
end
|
|
res += "]}"
|
|
f_output.print res
|
|
end
|
|
|
|
|
|
|
|
# Reads a tab separated file, and use the structure representation
|
|
# to retrieve the strutcure's classification from ClassyFire in a SDF format.
|
|
#
|
|
# @param input [String] path to the input file
|
|
# @return [String] path to the output file
|
|
def ClassyFireAPI.retrieve_entities_sdf(input,output)
|
|
absolute_path = File.expand_path(input)
|
|
f_input = File.open(absolute_path, 'r')
|
|
h = Hash.new
|
|
directory = absolute_path.split('/')[0...-1].join("/")
|
|
f_output = File.new(output, 'w')
|
|
res = String.new
|
|
|
|
f_input.each_line do |line|
|
|
sline = line.strip.split("\t")
|
|
h[sline[0]] = sline[-1]
|
|
end
|
|
|
|
puts h.keys.uniq.length
|
|
if h.keys.length > 0
|
|
i = 1
|
|
h.keys.uniq[0...-1].each do |key|
|
|
puts i
|
|
# puts "#{key} :: #{h[key]}"
|
|
begin
|
|
inchikey = %x(obabel -:"#{h[key]}" -oinchikey).strip.split("\t")[0]
|
|
# puts inchikey
|
|
qr = ClassyFireAPI.get_entity_classification(inchikey,format="sdf")
|
|
if qr.include?("The page you were looking for doesn't exist")
|
|
puts "The page you were looking for doesn't exist"
|
|
elsif qr.empty?
|
|
|
|
else
|
|
input = qr.split("\n")[1..-1].join("\n")
|
|
puts input
|
|
f_output.puts "#{key}\n"
|
|
f_output.puts input
|
|
end
|
|
rescue Exception => e
|
|
e.message
|
|
end
|
|
i += 1
|
|
end
|
|
key = h.keys[-1]
|
|
begin
|
|
inchikey = %x(obabel -:"#{h[key]}" -oinchikey).strip.split("\t")[0]
|
|
# puts inchikey
|
|
qr = ClassyFireAPI.get_entity_classification(inchikey,format="sdf")
|
|
if qr.include?("The page you were looking for doesn't exist")
|
|
puts "The page you were looking for doesn't exist"
|
|
elsif qr.empty?
|
|
|
|
else
|
|
input = qr.split("\n")[1..-1].join("\n")
|
|
puts input
|
|
f_output.puts "#{key}\n"
|
|
f_output.puts input
|
|
end
|
|
rescue Exception => e
|
|
e.message
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
if __FILE__ == $0
|
|
|
|
end
|