classyfire_api/lib/classyfire_api.rb

457 lines
16 KiB
Ruby
Raw Normal View History

2014-07-14 06:08:02 +01:00
require 'JSON'
require 'csv'
2014-07-14 06:08:02 +01:00
module ClassyFireAPI
URL = 'http://classyfire.wishartlab.com'
# Submits a ClassyFire query, which should be returned in a specific format.
#
# @param label [String] the label of the query.
# @param input[String] the input of the query.
# @return [Hash] A Ruby Hash with the id (and other attributes) of the Query or nil
# if there is an error.
def ClassyFireAPI.submit_query(label,input, type='STRUCTURE')
2014-07-14 06:08:02 +01:00
begin
q = RestClient.post URL+'/queries', {:label => label, :query_input => input, :query_type => type}.to_json, :accept => :json, :content_type => :json
2014-07-14 06:08:02 +01:00
rescue RestClient::BadRequest => e
e.response
rescue RestClient::InternalServerError => e
e.response
rescue RestClient::GatewayTimeout => e
e.response
rescue RestClient::RequestTimeout => e
e.response
rescue RestClient::UnprocessableEntity => e
e.response
2014-07-14 06:08:02 +01:00
end
q
2014-07-14 06:08:02 +01:00
end
# Retrieves the classification results for a given query.
#
# @param query_id [Integer] the ID of the query.
# @param format [String] the format of the query (either JSON, CSV, or SDF)
# @return [Text] A text file displaying the classification results for
# the query's entities in the specified format.
def ClassyFireAPI.get_query(query_id,format="json")
2014-07-14 06:08:02 +01:00
begin
if format == "json"
RestClient.get "#{URL}/queries/#{query_id}.json", :accept => :json
elsif format == "sdf"
RestClient.get "#{URL}/queries/#{query_id}.sdf", :accept => :sdf
elsif format == "csv"
RestClient.get "#{URL}/queries/#{query_id}.csv", :accept => :csv
end
2014-07-14 06:08:02 +01:00
rescue RestClient::ResourceNotFound => e
e.response
rescue RestClient::InternalServerError => e
e.response
rescue RestClient::GatewayTimeout => e
e.response
rescue RestClient::RequestTimeout => e
e.response
2014-07-14 06:08:02 +01:00
end
end
# Retrieves the classification results for a given entity.
#
# @param inchikey [String] the ID of the query.
# @param format [String] the format of the query, 'text' (either JSON, CSV, or SDF)
# @return [Text] A text file displaying the classification results for the entity in the specified format.
def ClassyFireAPI.get_entity_classification(inchikey,format="json")
2014-07-14 06:08:02 +01:00
inchikey_id = inchikey.to_s.gsub('InChIKey=','')
begin
if format == "json"
RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :json
elsif format == "sdf"
RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :sdf
elsif format == "csv"
RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :csv
end
2014-07-14 06:08:02 +01:00
rescue RestClient::ResourceNotFound => e
e.response
rescue RestClient::InternalServerError => e
e.response
rescue RestClient::GatewayTimeout => e
e.response
rescue RestClient::RequestTimeout => e
e.response
2014-07-14 06:08:02 +01:00
end
end
# Retrieves the status of a query
# @param query_id [Integer] the ID of the query
# @return [String] the query status, 'Done' or 'In progress', 'string'
2014-07-14 06:08:02 +01:00
def ClassyFireAPI.query_status(query_id)
begin
RestClient.get "#{URL}/queries/#{query_id}/status.json", :accept => :json
rescue Exception=>e
$stderr.puts e.message
nil
end
end
# Takes a tab-separated file and submit the contained structures in bulks of a given size
#
# For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either
# 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name
# for the 'IUPAC NAME' query type.
# 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the
# 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type.
#
# For 'FASTA' query type, just submit the query as a standard FASTA text.
# @param input_file [Text] The path to the input file.
# @param: slice_length [Integer] The maximum number of entries for each query input (the whole file
# is fragmented into n part of #slice_length entries each).
# @param: start [Integer] The starting index. Submit framgments from the index 'start'.
def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length, start, type='STRUCTURE')
@start_time = Time.now
absolute_path = File.expand_path(input_file)
f = File.open(absolute_path, 'r')
input = []
lines = File.readlines(absolute_path)
lines.uniq.each do |line|
sline = line.strip.split("\t")
if sline.length == 1
input <<"#{sline[0]}"
elsif sline.length >= 2
input <<"#{sline[0]}\t#{sline[2]}"
end
end
query_ids = []
subdivised_groups = input.uniq.each_slice(slice_length).to_a
puts "nr of subdivised_groups: #{subdivised_groups.length}"
puts subdivised_groups[0]
sleeping_time = 120
initial_nr_of_jobs = 60
i = start
while i < initial_nr_of_jobs
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{i}"
begin
puts "submitting #{title}"
q = submit_query(title,subdivised_groups[i-1].join("\n"),type)
query_ids << JSON.parse(q)['id']
rescue Exception => e
puts e.message
puts e.backtrace.inspect
end
i = i + 1
end
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
sleep(sleeping_time)
puts "Waking up at #{Time.now - @start_time}"
while i < subdivised_groups.length
k = 0
for k in (i..(i+80))
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{k}"
i = i + 1
begin
puts "submitting #{title}"
q = submit_query(title,subdivised_groups[k-1].join("\n"),type)
rescue Exception => e
puts e.message
puts e.backtrace.inspect
end
end
i = k
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
sleep(sleeping_time)
puts "Waking up at #{Time.now - @start_time}"
end
end
# Takes each file in a folder, and submit the contained structures in bluks of a given size.
#
# For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either
# 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name
# for the 'IUPAC NAME' query type.
# 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the
# 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type.
#
# For 'FASTA' query type, just submit the query as a standard FASTA text.
# @param: input_file [String] The path to the folder.
# @param: slice_length [Integer] The maximum number of entries for each query input (each file
# is fragmented into n part of #slice_length entries each), 'integer'
# @param type [String] the query_type 'STRUCTURE' (default) or 'IUPAC_NAME' or 'FASTA'
def ClassyFireAPI.submit_queries_from_directory(folder,slice_length,type="STRUCTURE")
if File.directory?(folder)
Dir.foreach(folder) do |filename|
puts "Filename: #{filename}"
ClassyFireAPI.submit_query_input_in_chunks(folder+"/"+filename,slice_length, type) unless filename == "." || filename == ".." || filename == ".DS_Store"
end
else
$stderr.puts "#{folder} is not a folder."
end
end
# Takes a tab-separated file and submit randomly selected structures in bulks of a given size.
#
# For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either
# 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name
# for the 'IUPAC NAME' query type.
# 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the
# 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type.
#
# For 'FASTA' query type, just submit the query as a standard FASTA text.
# @param tab_separated_input_file [Text] The path to the input file.
# @param size [Integer] The maximum number of entries for each query input (the whole file
# is fragmented into n part of #slice_length entries each).
# @param type [String] The query_type, 'STRUCTURE' (default) or 'IUPAC_NAME' or 'FASTA'.
def ClassyFireAPI.submit_random_subset_of_query_input_in_chunks(tab_separated_input_file,size, type='STRUCTURE')
@start_time = Time.now
absolute_path = File.expand_path(tab_separated_input_file)
f = File.open(absolute_path, 'r')
input = []
f.each_line do |line|
sline = line.strip.split("\t")
if sline.length == 1
input <<"#{sline[0]}"
elsif sline.length == 2
input <<"#{sline[0]}\t#{sline[1]}"
end
end
query_ids = []
indexes = []
r = 1
while r <= (size)
s = rand(0..(input.length - 1))
unless indexes.include?(s)
indexes<<s
r += 1
end
end
random_subset = indexes.map{|x| input[x]}
subdivised_groups = random_subset.each_slice(100).to_a
sleeping_time = 120
i = 0
while i < subdivised_groups.length
k = 0
for k in (i..(i + 5))
title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{k+1}"
i = i + 1
begin
puts "submitting #{title}"
q = submit_query(title,subdivised_groups[k].join("\n"),type)
rescue Exception => e
puts e.message
puts e.backtrace.inspect
end
end
i = k
puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s."
sleep(sleeping_time)
puts "Waking up at #{Time.now - @start_time}"
end
end
# Reads a tab separated file, and use the structure representation
#to retrieve the strutcure's classification from ClassyFire.
#
# @param input [String] path to the input file.
# @return [String] path to the output file.
def ClassyFireAPI.retrieve_classification(input,output)
absolute_path = File.expand_path(input)
f_input = File.open(absolute_path, 'r')
h = Hash.new
directory = absolute_path.split('/')[0...-1].join("/")
f_output = File.new(output, 'w')
res = String.new
res += "{"
res += '"id": 1,'
res += '"label":"' + output + '",' + '"classification_status":"Done",' + '"entities":['
f_input.each_line do |line|
sline = line.strip.split("\t")
if sline.length == 1
h[sline[0]] = sline[0]
elsif sline.length == 2
h[sline[0]] = line.strip
end
end
puts h.keys.uniq.length
if h.keys.length > 0
i = 1
h.keys.uniq[0..-1].each do |key|
puts i
puts "#{key} :: #{h[key]}"
begin
qs = submit_query(key,h[key])
qs_decoded = JSON.parse(qs)
qr = JSON.parse(get_query(qs_decoded["id"],format="json"))
res += qr["entities"][0].to_json
res += ","
i += 1
rescue Exception => e
e.message
end
end
key = h.keys[-1]
puts "#{key} :: #{h[key]}"
begin
qs = submit_query(key,h[key])
sleep(0.2)
qs_decoded = JSON.parse(qs)
qr = JSON.parse(get_query(qs_decoded["id"],format="json"))
# puts qr["entities"]
# sleep(0.2)
# f_output.print qr["entities"][0],"\n"
res += qr["entities"][0].to_json
# res += ","
rescue Exception => e
e.message
end
end
res += "]}"
f_output.print res
end
# Reads a tab separated file, and use the structure representation
# to retrieve the strutcure's classification from ClassyFire in a JSON format.
#
# @param input [String] path to the input file
# @return [String] path to the output file
def ClassyFireAPI.retrieve_entities_json(input,output)
absolute_path = File.expand_path(input)
f_input = File.open(absolute_path, 'r')
h = Hash.new
directory = absolute_path.split('/')[0...-1].join("/")
f_output = File.new(output, 'w')
puts
res = String.new
res += "{"
res += '"id": 1,'
res += '"label":"' + output + '",' + '"classification_status":"Done",' + '"entities":['
f_input.each_line do |line|
sline = line.strip.split("\t")
h[sline[0]] = sline[-1]
end
puts h.keys.uniq.length
if h.keys.length > 0
i = 1
h.keys.uniq[0...-1].each do |key|
puts i
# puts "#{key} :: #{h[key]}"
begin
inchikey = %x(/Applications/ChemAxon/JChem/bin/molconvert inchikey -s "#{h[key]}").strip
# puts inchikey
qr = JSON.parse(ClassyFireAPI.get_entity_classification(inchikey,format="json"))
qr['identifier'] = key
res += qr.to_json
res += ","
puts "#{key} :: RETURN NIL" if qr.nil? || qr['direct_parent']['name'].nil?
rescue Exception => e
e.message
end
i += 1
end
key = h.keys[-1]
# puts "#{key} :: #{h[key]}"
begin
inchikey = %x(/Applications/ChemAxon/JChem/bin/molconvert inchikey -s "#{h[key]}").strip
# puts inchikey
qr = JSON.parse(ClassyFireAPI.get_entity_classification(inchikey,format="json"))
qr['identifier'] = key
res += qr.to_json
puts "#{key} :: RETURN NIL" if qr.nil? || qr['direct_parent']['name'].nil?
# res += ","
rescue Exception => e
e.message
end
end
res += "]}"
f_output.print res
end
# Reads a tab separated file, and use the structure representation
# to retrieve the strutcure's classification from ClassyFire in a SDF format.
#
# @param input [String] path to the input file
# @return [String] path to the output file
def ClassyFireAPI.retrieve_entities_sdf(input,output)
absolute_path = File.expand_path(input)
f_input = File.open(absolute_path, 'r')
h = Hash.new
directory = absolute_path.split('/')[0...-1].join("/")
f_output = File.new(output, 'w')
res = String.new
f_input.each_line do |line|
sline = line.strip.split("\t")
h[sline[0]] = sline[-1]
end
puts h.keys.uniq.length
if h.keys.length > 0
i = 1
h.keys.uniq[0...-1].each do |key|
puts i
# puts "#{key} :: #{h[key]}"
begin
inchikey = %x(/Applications/ChemAxon/JChem/bin/molconvert inchikey -s "#{h[key]}").strip
# puts inchikey
qr = ClassyFireAPI.get_entity_classification(inchikey,format="sdf")
if qr.include?("The page you were looking for doesn't exist")
puts "The page you were looking for doesn't exist"
elsif qr.empty?
else
input = qr.split("\n")[1..-1].join("\n")
puts input
f_output.puts "#{key}\n"
f_output.puts input
end
rescue Exception => e
e.message
end
i += 1
end
key = h.keys[-1]
# puts "#{key} :: #{h[key]}"
begin
inchikey = %x(/Applications/ChemAxon/JChem/bin/molconvert inchikey -s "#{h[key]}").strip
# puts inchikey
qr = ClassyFireAPI.get_entity_classification(inchikey,format="sdf")
if qr.include?("The page you were looking for doesn't exist")
puts "The page you were looking for doesn't exist"
elsif qr.empty?
else
input = qr.split("\n")[1..-1].join("\n")
puts input
f_output.puts "#{key}\n"
f_output.puts input
end
rescue Exception => e
e.message
end
2014-07-14 06:08:02 +01:00
end
# f_output.print res
2014-07-14 06:08:02 +01:00
end
end