# markov算法

1. 问题描述
2. awk 程序
3. c++ 程序
4. c 程序

#1. 问题描述

Show your flowcharts and conceal your tables and I will be mystified. Show your tables and your flowcharts will be obvious.

 前缀 后缀 show your flowcharts tables your flowcharts and will flowcharts and conceal flowcharts will be your tables and and will be mystified. obvious. be mystified. show be obvious. (end)

``````设置 w1 和 w2 为文本的前两个词

随机地选出 w3,它是文本中 w1 w2 的后缀中的一个
打印 w3
把 w1 和 w2 分别换成 w2 和 w3
重复循环
``````

#2.awk 程序

awk 中有关联数组，正好可以用来表示前缀和后缀的关系。程序如下：

``````# markov.awk: markov chain algorithm for 2-word prefixes
BEGIN {	MAXGEN = 10000; NONWORD = "\n"; w1 = w2 = NONWORD }

{	for (i = 1; i <= NF; i++) { 	# read all words
statetab[w1,w2,++nsuffix[w1,w2]] = \$i
w1 = w2
w2 = \$i
}
}

END {
statetab[w1,w2,++nsuffix[w1,w2]] = NONWORD	# add tail
w1 = w2 = NONWORD
for (i = 0; i < MAXGEN; i++) {	# generate
r = int(rand()*nsuffix[w1,w2]) + 1  # nsuffix >= 1
p = statetab[w1,w2,r]
if (p == NONWORD)
exit
print p
w1 = w2			# advance chain
w2 = p
}
}
``````

#3. C++ 程序

``````/* Copyright (C) 1999 Lucent Technologies */
/* Excerpted from 'The Practice of Programming' */
/* by Brian W. Kernighan and Rob Pike */

#include <time.h>
#include <iostream>
#include <string>
#include <deque>
#include <map>
#include <vector>

using namespace std;

const int  NPREF = 2;
const char NONWORD[] = "\n";	// cannot appear as real line: we remove newlines
const int  MAXGEN = 10000; // maximum words generated

typedef deque<string> Prefix;

map<Prefix, vector<string> > statetab; // prefix -> suffixes

void		build(Prefix&, istream&);
void		generate(int nwords);
void		add(Prefix&, const string&);

// markov main: markov-chain random text generation
int main(void)
{
int	nwords = MAXGEN;
Prefix prefix;	// current input prefix

srand(time(NULL));
for (int i = 0; i < NPREF; i++)
build(prefix, cin);
generate(nwords);
return 0;
}

// build: read input words, build state table
void build(Prefix& prefix, istream& in)
{
string buf;

while (in >> buf)
}

// add: add word to suffix deque, update prefix
void add(Prefix& prefix, const string& s)
{
if (prefix.size() == NPREF) {
statetab[prefix].push_back(s);
prefix.pop_front();
}
prefix.push_back(s);
}

// generate: produce output, one word per line
void generate(int nwords)
{
Prefix prefix;
int i;

for (i = 0; i < NPREF; i++)
for (i = 0; i < nwords; i++) {
vector<string>& suf = statetab[prefix];
const string& w = suf[rand() % suf.size()];
if (w == NONWORD)
break;
cout << w << "\n";
prefix.push_back(w);
}
}
``````

#4. c 程序

``````/* Copyright (C) 1999 Lucent Technologies */
/* Excerpted from 'The Practice of Programming' */
/* by Brian W. Kernighan and Rob Pike */

/*
* Markov chain random text generator.
*/

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include "eprintf.h"

enum {
NPREF	= 2,	/* number of prefix words */
NHASH	= 4093,	/* size of state hash table array */
MAXGEN	= 10000	/* maximum words generated */
};

typedef struct State State;
typedef struct Suffix Suffix;

struct State {	/* prefix + suffix list */
char	*pref[NPREF];	/* prefix words */
Suffix	*suf;			/* list of suffixes */
State	*next;			/* next in hash table */
};

struct Suffix {	/* list of suffixes */
char	*word;			/* suffix */
Suffix	*next;			/* next in list of suffixes */
};

State	*lookup(char *prefix[], int create);
void	build(char *prefix[], FILE*);
void	generate(int nwords);
void	add(char *prefix[], char *word);

State	*statetab[NHASH];	/* hash table of states */

char NONWORD[] = "\n";  /* cannot appear as real word */

/* markov main: markov-chain random text generation */
int main(void)
{
int i, nwords = MAXGEN;
char *prefix[NPREF];		/* current input prefix */

int c;
long seed;

setprogname("markov");
seed = time(NULL);

srand(seed);
for (i = 0; i < NPREF; i++)	/* set up initial prefix */
prefix[i] = NONWORD;
build(prefix, stdin);
generate(nwords);
return 0;
}

const int MULTIPLIER = 31;  /* for hash() */

/* hash: compute hash value for array of NPREF strings */
unsigned int hash(char *s[NPREF])
{
unsigned int h;
unsigned char *p;
int i;

h = 0;
for (i = 0; i < NPREF; i++)
for (p = (unsigned char *) s[i]; *p != '\0'; p++)
h = MULTIPLIER * h + *p;
return h % NHASH;
}

/* lookup: search for prefix; create if requested. */
/*  returns pointer if present or created; NULL if not. */
/*  creation doesn't strdup so strings mustn't change later. */
State* lookup(char *prefix[NPREF], int create)
{
int i, h;
State *sp;

h = hash(prefix);
for (sp = statetab[h]; sp != NULL; sp = sp->next) {
for (i = 0; i < NPREF; i++)
if (strcmp(prefix[i], sp->pref[i]) != 0)
break;
if (i == NPREF)		/* found it */
return sp;
}
if (create) {
sp = (State *) emalloc(sizeof(State));
for (i = 0; i < NPREF; i++)
sp->pref[i] = prefix[i];
sp->suf = NULL;
sp->next = statetab[h];
statetab[h] = sp;
}
return sp;
}

/* addsuffix: add to state. suffix must not change later */
void addsuffix(State *sp, char *suffix)
{
Suffix *suf;

suf = (Suffix *) emalloc(sizeof(Suffix));
suf->word = suffix;
suf->next = sp->suf;
sp->suf = suf;
}

/* add: add word to suffix list, update prefix */
void add(char *prefix[NPREF], char *suffix)
{
State *sp;

sp = lookup(prefix, 1);  /* create if not found */
/* move the words down the prefix */
memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0]));
prefix[NPREF-1] = suffix;
}

/* build: read input, build prefix table */
void build(char *prefix[NPREF], FILE *f)
{
char buf[100], fmt[10];

/* create a format string; %s could overflow buf */
sprintf(fmt, "%%%ds", sizeof(buf)-1);
while (fscanf(f, fmt, buf) != EOF)
}

/* generate: produce output, one word per line */
void generate(int nwords)
{
State *sp;
Suffix *suf;
char *prefix[NPREF], *w;
int i, nmatch;

for (i = 0; i < NPREF; i++)	/* reset initial prefix */
prefix[i] = NONWORD;

for (i = 0; i < nwords; i++) {
sp = lookup(prefix, 0);
if (sp == NULL)
eprintf("internal error: lookup failed");
nmatch = 0;
for (suf = sp->suf; suf != NULL; suf = suf->next)
if (rand() % ++nmatch == 0) /* prob = 1/nmatch */
w = suf->word;
if (nmatch == 0)
eprintf("internal error: no suffix %d %s", i, prefix[0]);
if (strcmp(w, NONWORD) == 0)
break;
printf("%s\n", w);
memmove(prefix, prefix+1, (NPREF-1)*sizeof(prefix[0]));
prefix[NPREF-1] = w;
}
}
``````
/
Published under (CC) BY-NC-SA in categories 程序设计  tagged with awk  markov  马尔可夫