A dead-simple web API for LANL's HIV sequence locator providing results in JSON. Positioning, region, and protein information is all available. Most of the data presented in the human-readable HTML page is extracted via this API. Get in touch if you need something that's missing!
POST .../within/hiv
Requires one or more values for the POST parameter
sequence
. Both protein and nucleotide
sequences are accepted, although the data returned varies
by type due to what LANL returns. See the
curl example which queries a protein
sequence and the same sequence as nucleotides.
Optionally accepts a (highly recommended) base
parameter set to nucleotide
or amino
acid
which forces all sequences to be interpreted as
the given base type. This is necessary when submitting
sequences with an ambiguous base type due to the overlap in
IUPAC alphabets. In such cases, LANL seems to assume
nucleotides, potentially producing incorrect results. For
example, the amino acid sequence MGGDMKDNW
is
also a valid nucleotide sequence, albeit one many ambiguous
bases. Interpreting it as nucleotides, however, is
incorrect. It is not uncommon for short amino acid
peptides to exhibit this property.
On success (HTTP 200) the response body is a JSON array of objects, one per sequence. Both HTTP 4xx and 5xx status codes are used on failure with plain text bodies containing an error message.
HTTP Status | Reason |
---|---|
405 Method Not Allowed | The request did not use the HTTP POST method |
422 Unprocessable Entity | No sequence parameter was provided |
503 Service Unavailable | An unexpected condition occurred while parsing results from LANL |
500 Internal Server Error | An unexpected error occurred while processing your request |
The API tries not to return incorrect data from misparses of LANL's output. If it detects an anomoly in any of its parsing stages, it will abort the request and return an HTTP 503 Service Unavailable. If this happens to your request, or if you are receiving results you don't expect, please let us know!
Created by Thomas Sibley of the Mullins Lab at the University of Washington, Department of Microbiology.
Questions? Drop us a line.
curl -X POST http://indra.mullins.microbiol.washington.edu/locate-sequence/within/hiv \ --data sequence=SLYNTVAVLYYVHQR \ --data sequence=TCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGG
[ { "query" : "sequence_1", "query_sequence" : "SLYNTVAVLYYVHQR", "base_type" : "amino acid", "reverse_complement" : "0", "alignment" : "\n Query SLYNTVAVLY YVHQR 15\n :::::::.:: :::: \n HXB2 SLYNTVATLY CVHQR\n\n ", "hxb2_sequence" : "SLYNTVATLYCVHQR", "similarity_to_hxb2" : "86.7", "start" : "77" "end" : "91", "genome_start" : "1018", "genome_end" : "1062", "polyprotein" : "Gag", "region_names" : [ "Gag", "p17" ], "regions" : [ { "cds" : "Gag", "aa_from_cds_start" : [ "229", "273" ], "aa_from_polyprotein_start" : null, "aa_from_protein_start" : [ "77", "91" ], "aa_from_query_start" : [ "1", "15" ], "na_from_hxb2_start" : [ "1018", "1062" ] }, { "cds" : "p17", "aa_from_cds_start" : [ "229", "273" ], "aa_from_polyprotein_start" : null, "aa_from_protein_start" : [ "77", "91" ], "aa_from_query_start" : [ "1", "15" ], "na_from_hxb2_start" : [ "1018", "1062" ] } ], }, { "query" : "sequence_2", "query_sequence" : "TCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGG", "base_type" : "nucleotide", "reverse_complement" : "0", "alignment" : "\n Query TCATTATATA ATACAGTAGC AACCCTCTAT TGTGTGCATC AAAGG 45\n :::::::::: :::::::::: :::::::::: :::::::::: ::::: \n HXB2 TCATTATATA ATACAGTAGC AACCCTCTAT TGTGTGCATC AAAGG 1062\n\n ", "hxb2_sequence" : "TCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGG", "similarity_to_hxb2" : "100.0", "start" : "229" "end" : "273", "genome_start" : "1018", "genome_end" : "1062", "polyprotein" : "Gag", "region_names" : [ "Gag", "p17" ], "regions" : [ { "cds" : "Gag", "aa_from_protein_start" : [ "77", "91" ], "na_from_cds_start" : [ "229", "273" ], "na_from_hxb2_start" : [ "1018", "1062" ], "na_from_query_start" : [ "1", "45" ], "protein_translation" : "SLYNTVATLYCVHQR" }, { "cds" : "p17", "aa_from_protein_start" : [ "77", "91" ], "na_from_cds_start" : [ "229", "273" ], "na_from_hxb2_start" : [ "1018", "1062" ], "na_from_query_start" : [ "1", "45" ], "protein_translation" : "SLYNTVATLYCVHQR" } ], } ]
curl -X POST http://indra.mullins.microbiol.washington.edu/locate-sequence/within/hiv \ --data base='amino acid' \ --data sequence=MGGDMKDNW
Bio::WebService::LANL::SequenceLocator
#!/usr/bin/env perl # # First install the library: # cpan -i Bio::WebService::LANL::SequenceLocator # use strict; use warnings; use Bio::WebService::LANL::SequenceLocator; my $locator = Bio::WebService::LANL::SequenceLocator->new( agent_string => 'Your Organization - you@example.com', ); my @sequences = $locator->find([ "agcaatcagatggtcagccaaaattgccctatagtgcagaacatcc" ."aggggcaagtggtacatcaggccatatcacctagaactttaaatgca", ]);
#!/usr/bin/env perl use strict; use warnings; use JSON qw< decode_json >; use LWP::UserAgent; my $agent = LWP::UserAgent->new( agent => 'you@example.com' ); my $response = $agent->post( "http://indra.mullins.microbiol.washington.edu/locate-sequence/within/hiv" => [ sequence => "TCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGG", ], ); unless ($response->is_success) { die "Request failed: ", $response->status_line, "\n", $response->decoded_content; } my $results = decode_json( $response->decoded_content ); # $results is now an array ref, like the JSON above print $results->[0]{polyprotein}, "\n";
#!/usr/bin/env python2 from urllib2 import Request, urlopen, URLError from urllib import urlencode import json request = Request('http://indra.microbiol.washington.edu/locate-sequence/within/hiv') data = urlencode({ 'sequence': [ 'SLYNTVAVLYYVHQR', 'TCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGG' ] }, True); try: response = urlopen(request, data) text = response.read() results = json.loads(text) except URLError, e: print 'Request failed: ', e except ValueError, e: print 'Decoding JSON failed: ', e finally: if results == None: exit(1) print results
library("RCurl") library("rjson") results = tryCatch( fromJSON( postForm( "http://indra.mullins.microbiol.washington.edu/locate-sequence/within/hiv", sequence="SLYNTVAVLYYVHQR", sequence="TCATTATATAATACAGTAGCAACCCTCTATTGTGTGCATCAAAGG")), HTTPError = function(e) cat("Error making request: ", e$message), error = function(e) cat("Error decoding JSON")) print(lapply(results, function(s) s$genome_start))