classyfire_api/lib/classyfire_api.rb

459 lines
15 KiB
Ruby
Raw Normal View History

2014-07-14 06:08:02 +01:00
require 'JSON'
require 'csv'
require 'rest-client'
2016-11-21 07:11:35 +00:00
# require 'openbabel'
# require 'node_resource.rb'
2014-07-14 06:08:02 +01:00
module ClassyFireAPI
URL = 'http://classyfire.wishartlab.com'
# Submits a ClassyFire query in a JSON format.
#
# @param label [String] the label of the query.
# @param input [String] the input of the query.
# @param type [String] the type of the query.
# @return [Hash] A Ruby Hash with the id (and other attributes) of the Query or nil
# if there is an error. Use JSON.parse to get a the json object.
def ClassyFireAPI.submit_query(label,input, type='STRUCTURE')
2014-07-14 06:08:02 +01:00
begin
q = RestClient.post URL+'/queries', {:label => label, :query_input => input, :query_type => type}.to_json, :accept => :json, :content_type => :json
2014-07-14 06:08:02 +01:00
rescue RestClient::BadRequest => e
e.response
rescue RestClient::InternalServerError => e
e.response
rescue RestClient::GatewayTimeout => e
e.response
rescue RestClient::RequestTimeout => e
e.response
rescue RestClient::UnprocessableEntity => e
e.response
2014-07-14 06:08:02 +01:00
end
q
2014-07-14 06:08:02 +01:00
end
# Retrieves the classification results for a given query.
#
# @param query_id [Integer] the ID of the query.
# @param format [String] the format of the query (either JSON, CSV, or SDF)
# @return [Text] A text file displaying the classification results for
# the query's entities in the specified format.
def ClassyFireAPI.get_query(query_id,format="json")
2014-07-14 06:08:02 +01:00
begin
if format == "json"
RestClient.get "#{URL}/queries/#{query_id}.json", :accept => :json
elsif format == "sdf"
RestClient.get "#{URL}/queries/#{query_id}.sdf", :accept => :sdf
elsif format == "csv"
RestClient.get "#{URL}/queries/#{query_id}.csv", :accept => :csv
end
2014-07-14 06:08:02 +01:00
rescue RestClient::ResourceNotFound => e
e.response
rescue RestClient::InternalServerError => e
e.response
rescue RestClient::GatewayTimeout => e
e.response
rescue RestClient::RequestTimeout => e
e.response
end
2014-07-14 06:08:02 +01:00
end
# Return data for the TaxNode with ID chemontid.
#
# @param chemontid [String] the ChemOnt ID of the entity.
# @return [Text] A text displaying the classification results for the entity in the specified format.
# Use JSON.parse to get a the json object.
def ClassyFireAPI.get_chemont_node(chemontid)
chemont_id = chemontid.to_s.gsub("CHEMONTID:","C")
2014-07-14 06:08:02 +01:00
begin
RestClient.get "#{URL}/tax_nodes/#{chemont_id}.json", :accept => :json
2014-07-14 06:08:02 +01:00
rescue RestClient::ResourceNotFound => e
e.response
rescue RestClient::InternalServerError => e
e.response
rescue RestClient::GatewayTimeout => e
e.response
rescue RestClient::RequestTimeout => e
e.response
2014-07-14 06:08:02 +01:00
end
end
# Retrieves the classification results for a given sequence.
#
# @param fingerprint [String] the fingerprint (generated from the sequence using Digest::MD5).
# @param format [String] the format of the query (Only JSON)
# @return [Text] A text displaying the classification results for the entity in the specified format.
# Use JSON.parse to get a the json object.
def ClassyFireAPI.get_sequence_classification(fingerprint,format="json")
begin
if format == "json"
RestClient.get "#{URL}/entities/#{fingerprint}.#{format}", :accept => :json
end
rescue RestClient::ResourceNotFound => e
e.response
rescue RestClient::InternalServerError => e
e.response
rescue RestClient::GatewayTimeout => e
e.response
rescue RestClient::RequestTimeout => e
e.response
end
end
# Retrieves the classification results for a given chemical entity.
#
# @param the InChIKey [String] of the entity
# @param The format [String] the format of the query (Only JSON)
# @return [Text] A text file displaying the classification results for the entity in the specified format.
def ClassyFireAPI.get_entity_classification(inchikey,format="json")
inchikey_id = inchikey.to_s.gsub('InChIKey=','')
begin
if format == "json"
RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :json
elsif format == "sdf"
RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :sdf
elsif format == "csv"
RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :csv
end
rescue RestClient::ResourceNotFound => e
e.response
rescue RestClient::InternalServerError => e
e.response
rescue RestClient::GatewayTimeout => e
e.response
rescue RestClient::RequestTimeout => e
e.response
end
end
# Retrieves the status of a query
# @param query_id [Integer] the ID of the query
# @return [String] the query status, 'Done' or 'In progress', 'string'
2014-07-14 06:08:02 +01:00
def ClassyFireAPI.query_status(query_id)
begin
RestClient.get "#{URL}/queries/#{query_id}/status.json", :accept => :json
rescue Exception=>e
$stderr.puts e.message
nil
end
end
# Takes a tab-separated file and submit the contained structures in bulks of a given size
#
# For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either
# 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name
# for the 'IUPAC NAME' query type.
# 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the
# 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type.
#
# For 'FASTA' query type, just submit the query as a standard FASTA text.
# @param input_file [Text] The path to the input file.
# @param: slice_length [Integer] The maximum number of entries for each query input (the whole file
# is fragmented into n part of #slice_length entries each).
# @param: start [Integer] The starting index. Submit framgments from the index 'start'.
def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length=10, start=1, type='STRUCTURE')
@start_time = Time.now
absolute_path = File.expand_path(input_file)
f = File.open(absolute_path, 'r')
input = []
lines = File.readlines(absolute_path)
i = 0
lines.uniq.each do |line|
i += 1
sline = line.strip.split("\t")
if sline.length == 1
input <<"#{sline[0]}"
elsif sline.length >= 2
#ID\tSMILES (OR INCHI, OR VALID IUPAC NAME)
input <<"#{sline[0]}\t#{sline[1]}"
end
2016-11-21 07:11:35 +00:00
# input <<"#{sline[0]}"
end
# puts "=============",input.length, input[0]
query_ids = []
subdivised_groups = input.uniq.each_slice(slice_length).to_a
puts "nr of subdivised_groups: #{subdivised_groups.length}"
# puts subdivised_groups[0]
sleeping_time = 60
initial_nr_of_jobs = 2
i = start
if i < initial_nr_of_jobs
while i <= initial_nr_of_jobs
title = File.basename(absolute_path).split(".")[0] + "_part_#{i}"
if i <= subdivised_groups.length
puts "\n---------------------------------"
begin
puts "submitting #{title}"
# puts subdivised_groups[i-1].join("\n")
q = submit_query(title,subdivised_groups[i-1].join("\n"),type)
puts "Query ID: " + JSON.parse(q)['id'].to_s
query_ids << JSON.parse(q)['id']
sleep(20)
rescue Exception => e
puts e.message
puts e.backtrace.inspect
end
i = i + 1
else
break
2016-11-21 07:11:35 +00:00
end
end
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
sleep(sleeping_time)
puts "Waking up at #{Time.now - @start_time}"
end
while i >= initial_nr_of_jobs && i < subdivised_groups.length
k = 0
for k in (i...(i + initial_nr_of_jobs))
title = File.basename(absolute_path).split(".")[0] + "_part_#{k}"
begin
puts "submitting #{title}"
q = submit_query(title,subdivised_groups[k-1].join("\n"),type)
puts "Query ID: " + JSON.parse(q)['id'].to_s
query_ids << JSON.parse(q)['id']
sleep(20)
rescue Exception => e
puts e.message
puts e.backtrace.inspect
end
i = i + 1
end
if i >= initial_nr_of_jobs && i < subdivised_groups.length
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
sleep(sleeping_time)
puts "Waking up at #{Time.now - @start_time}"
end
end
puts "Done at #{Time.now - @start_time}"
end
# Takes each file in a folder, and submit the contained structures in bluks of a given size.
#
# For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either
# 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name
# for the 'IUPAC NAME' query type.
# 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the
# 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type.
#
# For 'FASTA' query type, just submit the query as a standard FASTA text.
# @param: input_file [String] The path to the folder.
# @param: slice_length [Integer] The maximum number of entries for each query input (each file
# is fragmented into n part of #slice_length entries each), 'integer'
# @param type [String] the query_type 'STRUCTURE' (default) or 'IUPAC_NAME' or 'FASTA'
def ClassyFireAPI.submit_queries_from_directory(folder,slice_length,type="STRUCTURE")
if File.directory?(folder)
Dir.foreach(folder) do |filename|
puts "Filename: #{filename}"
2016-11-21 07:11:35 +00:00
ClassyFireAPI.submit_query_input_in_chunks(folder+"/"+filename,slice_length, type) unless filename[0] == "." || File.directory?(filenmae)
end
else
$stderr.puts "#{folder} is not a folder."
end
end
# Reads a tab separated file, and use the structure representation
#to retrieve the strutcure's classification from ClassyFire.
#
# @param input [String] path to the input file.
# @return [String] path to the output file.
def ClassyFireAPI.retrieve_classification(input,output)
absolute_path = File.expand_path(input)
f_input = File.open(absolute_path, 'r')
h = Hash.new
directory = absolute_path.split('/')[0...-1].join("/")
f_output = File.new(output, 'w')
res = String.new
res += "{"
res += '"id": 1,'
res += '"label":"' + output + '",' + '"classification_status":"Done",' + '"entities":['
f_input.each_line do |line|
sline = line.strip.split("\t")
if sline.length == 1
h[sline[0]] = sline[0]
elsif sline.length == 2
h[sline[0]] = line.strip
end
end
puts h.keys.uniq.length
if h.keys.length > 0
i = 1
h.keys.uniq[0..-1].each do |key|
puts i
puts "#{key} :: #{h[key]}"
begin
qs = submit_query(key,h[key])
qs_decoded = JSON.parse(qs)
qr = JSON.parse(get_query(qs_decoded["id"],format="json"))
res += qr["entities"][0].to_json
res += ","
i += 1
rescue Exception => e
e.message
end
end
key = h.keys[-1]
puts "#{key} :: #{h[key]}"
begin
qs = submit_query(key,h[key])
sleep(0.2)
qs_decoded = JSON.parse(qs)
qr = JSON.parse(get_query(qs_decoded["id"],format="json"))
# puts qr["entities"]
# sleep(0.2)
# f_output.print qr["entities"][0],"\n"
res += qr["entities"][0].to_json
# res += ","
rescue Exception => e
e.message
end
end
res += "]}"
f_output.print res
end
# Reads a tab separated file, and use the structure representation
# to retrieve the strutcure's classification from ClassyFire in a JSON format.
#
# @param input [String] path to the input file
# @return [String] path to the output file
def ClassyFireAPI.retrieve_entities_json(input,output)
absolute_path = File.expand_path(input)
f_input = File.open(absolute_path, 'r')
h = Hash.new
directory = absolute_path.split('/')[0...-1].join("/")
f_output = File.new(output, 'w')
puts
res = String.new
res += "{"
res += '"id": 1,'
res += '"label":"' + output + '",' + '"classification_status":"Done",' + '"entities":['
f_input.each_line do |line|
sline = line.strip.split("\t")
h[sline[0]] = sline[-1]
end
puts h.keys.uniq.length
if h.keys.length > 0
i = 1
h.keys.uniq[0...-1].each do |key|
puts i
# puts "#{key} :: #{h[key]}"
begin
inchikey = %x(obabel -:"#{h[key]}" -oinchikey).strip.split("\t")[0]
# puts inchikey
qr = JSON.parse(ClassyFireAPI.get_entity_classification(inchikey,format="json"))
qr['identifier'] = key
res += qr.to_json
res += ","
puts "#{key} :: RETURN NIL" if qr.nil? || qr['direct_parent']['name'].nil?
rescue Exception => e
e.message
end
i += 1
end
key = h.keys[-1]
# puts "#{key} :: #{h[key]}"
begin
inchikey = %x(obabel -:"#{h[key]}" -oinchikey).strip.split("\t")[0]
# puts inchikey
qr = JSON.parse(ClassyFireAPI.get_entity_classification(inchikey,format="json"))
qr['identifier'] = key
res += qr.to_json
puts "#{key} :: RETURN NIL" if qr.nil? || qr['direct_parent']['name'].nil?
# res += ","
rescue Exception => e
e.message
end
end
res += "]}"
f_output.print res
end
# Reads a tab separated file, and use the structure representation
# to retrieve the strutcure's classification from ClassyFire in a SDF format.
#
# @param input [String] path to the input file
# @return [String] path to the output file
def ClassyFireAPI.retrieve_entities_sdf(input,output)
absolute_path = File.expand_path(input)
f_input = File.open(absolute_path, 'r')
h = Hash.new
directory = absolute_path.split('/')[0...-1].join("/")
f_output = File.new(output, 'w')
res = String.new
f_input.each_line do |line|
sline = line.strip.split("\t")
h[sline[0]] = sline[-1]
end
puts h.keys.uniq.length
if h.keys.length > 0
i = 1
h.keys.uniq[0...-1].each do |key|
puts i
# puts "#{key} :: #{h[key]}"
begin
inchikey = %x(obabel -:"#{h[key]}" -oinchikey).strip.split("\t")[0]
# puts inchikey
qr = ClassyFireAPI.get_entity_classification(inchikey,format="sdf")
if qr.include?("The page you were looking for doesn't exist")
puts "The page you were looking for doesn't exist"
elsif qr.empty?
else
input = qr.split("\n")[1..-1].join("\n")
puts input
f_output.puts "#{key}\n"
f_output.puts input
end
rescue Exception => e
e.message
end
i += 1
end
key = h.keys[-1]
begin
inchikey = %x(obabel -:"#{h[key]}" -oinchikey).strip.split("\t")[0]
# puts inchikey
qr = ClassyFireAPI.get_entity_classification(inchikey,format="sdf")
if qr.include?("The page you were looking for doesn't exist")
puts "The page you were looking for doesn't exist"
elsif qr.empty?
else
input = qr.split("\n")[1..-1].join("\n")
puts input
f_output.puts "#{key}\n"
f_output.puts input
end
rescue Exception => e
e.message
end
2014-07-14 06:08:02 +01:00
end
end
end
if __FILE__ == $0
end