require 'JSON' require 'csv' module ClassyFireAPI URL = 'http://classyfire.wishartlab.com' # Submits a ClassyFire query, which should be returned in a specific format. # # @param label [String] the label of the query. # @param input[String] the input of the query. # @return [Hash] A Ruby Hash with the id (and other attributes) of the Query or nil # if there is an error. def ClassyFireAPI.submit_query(label,input, type='STRUCTURE') begin q = RestClient.post URL+'/queries', {:label => label, :query_input => input, :query_type => type}.to_json, :accept => :json, :content_type => :json rescue RestClient::BadRequest => e e.response rescue RestClient::InternalServerError => e e.response rescue RestClient::GatewayTimeout => e e.response rescue RestClient::RequestTimeout => e e.response rescue RestClient::UnprocessableEntity => e e.response end q end # Retrieves the classification results for a given query. # # @param query_id [Integer] the ID of the query. # @param format [String] the format of the query (either JSON, CSV, or SDF) # @return [Text] A text file displaying the classification results for # the query's entities in the specified format. def ClassyFireAPI.get_query(query_id,format="json") begin if format == "json" RestClient.get "#{URL}/queries/#{query_id}.json", :accept => :json elsif format == "sdf" RestClient.get "#{URL}/queries/#{query_id}.sdf", :accept => :sdf elsif format == "csv" RestClient.get "#{URL}/queries/#{query_id}.csv", :accept => :csv end rescue RestClient::ResourceNotFound => e e.response rescue RestClient::InternalServerError => e e.response rescue RestClient::GatewayTimeout => e e.response rescue RestClient::RequestTimeout => e e.response end end # Retrieves the classification results for a given entity. # # @param inchikey [String] the ID of the query. # @param format [String] the format of the query, 'text' (either JSON, CSV, or SDF) # @return [Text] A text file displaying the classification results for the entity in the specified format. def ClassyFireAPI.get_entity_classification(inchikey,format="json") inchikey_id = inchikey.to_s.gsub('InChIKey=','') begin if format == "json" RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :json elsif format == "sdf" RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :sdf elsif format == "csv" RestClient.get "#{URL}/entities/#{inchikey_id}.#{format}", :accept => :csv end rescue RestClient::ResourceNotFound => e e.response rescue RestClient::InternalServerError => e e.response rescue RestClient::GatewayTimeout => e e.response rescue RestClient::RequestTimeout => e e.response end end # Retrieves the status of a query # @param query_id [Integer] the ID of the query # @return [String] the query status, 'Done' or 'In progress', 'string' def ClassyFireAPI.query_status(query_id) begin RestClient.get "#{URL}/queries/#{query_id}/status.json", :accept => :json rescue Exception=>e $stderr.puts e.message nil end end # Takes a tab-separated file and submit the contained structures in bulks of a given size # # For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either # 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name # for the 'IUPAC NAME' query type. # 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the # 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type. # # For 'FASTA' query type, just submit the query as a standard FASTA text. # @param input_file [Text] The path to the input file. # @param: slice_length [Integer] The maximum number of entries for each query input (the whole file # is fragmented into n part of #slice_length entries each). # @param: start [Integer] The starting index. Submit framgments from the index 'start'. def ClassyFireAPI.submit_query_input_in_chunks(input_file,slice_length, start, type='STRUCTURE') @start_time = Time.now absolute_path = File.expand_path(input_file) f = File.open(absolute_path, 'r') input = [] lines = File.readlines(absolute_path) lines.uniq.each do |line| sline = line.strip.split("\t") if sline.length == 1 input <<"#{sline[0]}" elsif sline.length >= 2 input <<"#{sline[0]}\t#{sline[2]}" end end query_ids = [] subdivised_groups = input.uniq.each_slice(slice_length).to_a puts "nr of subdivised_groups: #{subdivised_groups.length}" puts subdivised_groups[0] sleeping_time = 120 initial_nr_of_jobs = 60 i = start while i < initial_nr_of_jobs title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{i}" begin puts "submitting #{title}" q = submit_query(title,subdivised_groups[i-1].join("\n"),type) query_ids << JSON.parse(q)['id'] rescue Exception => e puts e.message puts e.backtrace.inspect end i = i + 1 end puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s." sleep(sleeping_time) puts "Waking up at #{Time.now - @start_time}" while i < subdivised_groups.length k = 0 for k in (i..(i+80)) title = File.basename(absolute_path).split(".")[0] + "_yannick" + "_part_#{k}" i = i + 1 begin puts "submitting #{title}" q = submit_query(title,subdivised_groups[k-1].join("\n"),type) rescue Exception => e puts e.message puts e.backtrace.inspect end end i = k puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s." sleep(sleeping_time) puts "Waking up at #{Time.now - @start_time}" end end # Takes each file in a folder, and submit the contained structures in bluks of a given size. # # For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either # 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name # for the 'IUPAC NAME' query type. # 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the # 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type. # # For 'FASTA' query type, just submit the query as a standard FASTA text. # @param: input_file [String] The path to the folder. # @param: slice_length [Integer] The maximum number of entries for each query input (each file # is fragmented into n part of #slice_length entries each), 'integer' # @param type [String] the query_type 'STRUCTURE' (default) or 'IUPAC_NAME' or 'FASTA' def ClassyFireAPI.submit_queries_from_directory(folder,slice_length,type="STRUCTURE") if File.directory?(folder) Dir.foreach(folder) do |filename| puts "Filename: #{filename}" ClassyFireAPI.submit_query_input_in_chunks(folder+"/"+filename,slice_length, type) unless filename == "." || filename == ".." || filename == ".DS_Store" end else $stderr.puts "#{folder} is not a folder." end end # Takes a tab-separated file and submit randomly selected structures in bulks of a given size. # # For 'STRUCTURE' or 'IUPAC_NAME'query types, each line must contain either # 1) Only a structural represenation: SMILES, InChI for the 'STRUCTURE' query_type or a IUPAC name # for the 'IUPAC NAME' query type. # 2) a tab-separated pair of an ID and the corresponding sructure representation: SMILES, InChI for the # 'STRUCTURE' query_type or a IUPAC name for the 'IUPAC NAME' query type. # # For 'FASTA' query type, just submit the query as a standard FASTA text. # @param tab_separated_input_file [Text] The path to the input file. # @param size [Integer] The maximum number of entries for each query input (the whole file # is fragmented into n part of #slice_length entries each). # @param type [String] The query_type, 'STRUCTURE' (default) or 'IUPAC_NAME' or 'FASTA'. def ClassyFireAPI.submit_random_subset_of_query_input_in_chunks(tab_separated_input_file,size, type='STRUCTURE') @start_time = Time.now absolute_path = File.expand_path(tab_separated_input_file) f = File.open(absolute_path, 'r') input = [] f.each_line do |line| sline = line.strip.split("\t") if sline.length == 1 input <<"#{sline[0]}" elsif sline.length == 2 input <<"#{sline[0]}\t#{sline[1]}" end end query_ids = [] indexes = [] r = 1 while r <= (size) s = rand(0..(input.length - 1)) unless indexes.include?(s) indexes< e puts e.message puts e.backtrace.inspect end end i = k puts "Going to sleep at #{Time.now - @start_time} for #{sleeping_time} s." sleep(sleeping_time) puts "Waking up at #{Time.now - @start_time}" end end # Reads a tab separated file, and use the structure representation #to retrieve the strutcure's classification from ClassyFire. # # @param input [String] path to the input file. # @return [String] path to the output file. def ClassyFireAPI.retrieve_classification(input,output) absolute_path = File.expand_path(input) f_input = File.open(absolute_path, 'r') h = Hash.new directory = absolute_path.split('/')[0...-1].join("/") f_output = File.new(output, 'w') res = String.new res += "{" res += '"id": 1,' res += '"label":"' + output + '",' + '"classification_status":"Done",' + '"entities":[' f_input.each_line do |line| sline = line.strip.split("\t") if sline.length == 1 h[sline[0]] = sline[0] elsif sline.length == 2 h[sline[0]] = line.strip end end puts h.keys.uniq.length if h.keys.length > 0 i = 1 h.keys.uniq[0..-1].each do |key| puts i puts "#{key} :: #{h[key]}" begin qs = submit_query(key,h[key]) qs_decoded = JSON.parse(qs) qr = JSON.parse(get_query(qs_decoded["id"],format="json")) res += qr["entities"][0].to_json res += "," i += 1 rescue Exception => e e.message end end key = h.keys[-1] puts "#{key} :: #{h[key]}" begin qs = submit_query(key,h[key]) sleep(0.2) qs_decoded = JSON.parse(qs) qr = JSON.parse(get_query(qs_decoded["id"],format="json")) # puts qr["entities"] # sleep(0.2) # f_output.print qr["entities"][0],"\n" res += qr["entities"][0].to_json # res += "," rescue Exception => e e.message end end res += "]}" f_output.print res end # Reads a tab separated file, and use the structure representation # to retrieve the strutcure's classification from ClassyFire in a JSON format. # # @param input [String] path to the input file # @return [String] path to the output file def ClassyFireAPI.retrieve_entities_json(input,output) absolute_path = File.expand_path(input) f_input = File.open(absolute_path, 'r') h = Hash.new directory = absolute_path.split('/')[0...-1].join("/") f_output = File.new(output, 'w') puts res = String.new res += "{" res += '"id": 1,' res += '"label":"' + output + '",' + '"classification_status":"Done",' + '"entities":[' f_input.each_line do |line| sline = line.strip.split("\t") h[sline[0]] = sline[-1] end puts h.keys.uniq.length if h.keys.length > 0 i = 1 h.keys.uniq[0...-1].each do |key| puts i # puts "#{key} :: #{h[key]}" begin inchikey = %x(/Applications/ChemAxon/JChem/bin/molconvert inchikey -s "#{h[key]}").strip # puts inchikey qr = JSON.parse(ClassyFireAPI.get_entity_classification(inchikey,format="json")) qr['identifier'] = key res += qr.to_json res += "," puts "#{key} :: RETURN NIL" if qr.nil? || qr['direct_parent']['name'].nil? rescue Exception => e e.message end i += 1 end key = h.keys[-1] # puts "#{key} :: #{h[key]}" begin inchikey = %x(/Applications/ChemAxon/JChem/bin/molconvert inchikey -s "#{h[key]}").strip # puts inchikey qr = JSON.parse(ClassyFireAPI.get_entity_classification(inchikey,format="json")) qr['identifier'] = key res += qr.to_json puts "#{key} :: RETURN NIL" if qr.nil? || qr['direct_parent']['name'].nil? # res += "," rescue Exception => e e.message end end res += "]}" f_output.print res end # Reads a tab separated file, and use the structure representation # to retrieve the strutcure's classification from ClassyFire in a SDF format. # # @param input [String] path to the input file # @return [String] path to the output file def ClassyFireAPI.retrieve_entities_sdf(input,output) absolute_path = File.expand_path(input) f_input = File.open(absolute_path, 'r') h = Hash.new directory = absolute_path.split('/')[0...-1].join("/") f_output = File.new(output, 'w') res = String.new f_input.each_line do |line| sline = line.strip.split("\t") h[sline[0]] = sline[-1] end puts h.keys.uniq.length if h.keys.length > 0 i = 1 h.keys.uniq[0...-1].each do |key| puts i # puts "#{key} :: #{h[key]}" begin inchikey = %x(/Applications/ChemAxon/JChem/bin/molconvert inchikey -s "#{h[key]}").strip # puts inchikey qr = ClassyFireAPI.get_entity_classification(inchikey,format="sdf") if qr.include?("The page you were looking for doesn't exist") puts "The page you were looking for doesn't exist" elsif qr.empty? else input = qr.split("\n")[1..-1].join("\n") puts input f_output.puts "#{key}\n" f_output.puts input end rescue Exception => e e.message end i += 1 end key = h.keys[-1] # puts "#{key} :: #{h[key]}" begin inchikey = %x(/Applications/ChemAxon/JChem/bin/molconvert inchikey -s "#{h[key]}").strip # puts inchikey qr = ClassyFireAPI.get_entity_classification(inchikey,format="sdf") if qr.include?("The page you were looking for doesn't exist") puts "The page you were looking for doesn't exist" elsif qr.empty? else input = qr.split("\n")[1..-1].join("\n") puts input f_output.puts "#{key}\n" f_output.puts input end rescue Exception => e e.message end end # f_output.print res end end