Wednesday, 25 August 2010

Print ip, email addressess or urls found in text in c

Print ip, email addressess or urls found in text

#include <>
#include <>
#include <>
#include <>
#include <>
#include <>
#include <>
#include <>

#define PACKAGE "miep"
#define VERSION "1.0.0"

#define IPEXPR "([0-9]{1,3})\\.([0-9]{1,3})\\.([0-9]{1,3})\\.([0-9]{1,3})"
#define EMEXPR ".*@.*\\.([a-zA-Z]{1,3})$"
#define UREXPR "(href|src)="

void print_ipadd(FILE *fp);
void print_email(FILE *fp);
void print_url(FILE *fp);
void print_help(int exval);

int main(int argc, char *argv[]) {
FILE *fp = stdin;
int opt = 0;
int em_set = 0;
int ip_set = 0;
int ur_set = 0;

setlocale(LC_ALL, "");

while((opt = getopt(argc, argv, "hvieu")) != -1) {
switch(opt) {
case 'h':
print_help(0);
break;
case 'v':
fprintf(stdout, "%s %s\n", PACKAGE, VERSION);
exit(0);
break;
case 'i':
ip_set = 1;
break;
case 'e':
em_set = 1;
break;
case 'u':
ur_set = 1;
break;
case '?':
fprintf(stderr, "%s: Error: No such option `%c'\n\n", PACKAGE, optopt);
print_help(1);
break;
} /* switch */
} /* while */

if(argc == 1 || (ip_set == 0 && em_set == 0 && ur_set == 0))
print_help(1);

if((optind - argc) == 0) {
if(em_set == 1)
print_email(fp);
else if(ip_set == 1)
print_ipadd(fp);
else
print_url(fp);
} else {
/* loop over remaining args [files] */
for(; optind < argc; optind++) {
if(freopen(argv[optind], "r", fp) == NULL) {
perror(argv[optind]);
continue;
}

if(em_set == 1)
print_email(fp);
else if(ip_set == 1)
print_ipadd(fp);
else
print_url(fp);
} /* for */
} /* else */

fclose(fp);
return 0;
}

void print_ipadd(FILE *fp) {
char line[1024];
char *address = NULL;
char delim[] = ",:;`/\"+-_(){}[]<>*&^%$#@!?~/|\\= \t\r\n";
int retval = 0;
regex_t re;

if(regcomp(&re, IPEXPR, REG_EXTENDED) != 0)
return;

while((fgets(line, 1024, fp)) != NULL) {
if(strchr(line, '.') == NULL)
continue;

address = strtok(line, delim);
while(address != NULL) {
if(strlen(address) <= 15)
if((retval = regexec(&re, address, 0, NULL, 0)) == 0)
printf("%s\n", address);

address = strtok(NULL, delim);
} /* while */
} /* while */
} /* print_ipadd */

void print_email(FILE *fp) {
char address[256];
char line[1024];
char *ptr1 = NULL;
char *ptr2 = NULL;
int retval = 0;
regex_t re;

if(regcomp(&re, EMEXPR, REG_EXTENDED) != 0)
return;

while((fgets(line, 1024, fp)) != NULL) {
if(strchr(line, '@') == NULL && strchr(line, '.') == NULL)
continue;

for(ptr1 = line, ptr2 = address; *ptr1; ptr1++) {
if(isalpha(*ptr1) || isdigit(*ptr1) || strchr(".-_@", *ptr1) != NULL)
*ptr2++ = *ptr1;
else {
*ptr2 = '\0';

if(strlen(address) >= 6 && strchr(address, '@') != NULL &&
strchr(address, '.') != NULL)
if((retval = regexec(&re, address, 0, NULL, 0)) == 0)
printf("%s\n", address);

ptr2 = address;
} /* else */
} /* for */
} /* while */
} /* print_email */

void print_url(FILE *fp) {
char line[1024];
char delim[] = "<> \t\n";
char *url = NULL;
int retval = 0;
regex_t re;

if(regcomp(&re, UREXPR, REG_ICASE|REG_EXTENDED) != 0)
return;

while((fgets(line, 1024, fp)) != NULL) {
url = strtok(line, delim);
while(url != NULL) {
if((retval = regexec(&re, url, 0, NULL, 0)) == 0)
printf("%s\n", url);

url = strtok(NULL, delim);
} /* while */
} /* while */
} /* print_url */

void print_help(int exval) {
printf("%s,%s print e-mail, urls or ip addresses from in textdata\n", PACKAGE, VERSION);
printf("%s [-h] [-v] [-i] [-e] [-u] FILE...\n\n", PACKAGE);

printf(" -h print this help and exit\n");
printf(" -v print version and exit\n\n");

printf(" -i print ip addresses\n");
printf(" -e print e-mail addresses\n");
printf(" -u print urls\n\n");

exit(exval);
}

Read more: http://cmagical.blogspot.com/2010_02_14_archive.html#ixzz0xgYHzXUH

No comments:

Post a Comment