GEO

C

UDP Receive Socket

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/socket.h>

#define BUFF_SIZE 1024

int sock;
sock = sopcket(PF_INET, SOCK_DGRAM, 0); // SOCK_STREAM in TCP
if(-1 == sock) (printf("socket creation failed"); exit(1);}
struct sockaddr_in sv_ad;
memset(&sv_ad, 0, sizeof(sv_ad));
sv_ad.sin_family = PF_INET;
    IPv4 protocol
sv_ad.sin_port = htons(4000);
    port number = 4000
sv_ad.sin_addr.s_addr = htonl(INADDR_ANY)
    Addressing, using htonl(INADDR_ANY) instead of a fixed ip to change ip for each running system
struct sockaddr_in cl_ad;
int cl_ad_size;
    Set variable to receive address information of sender
cl_ad_size = sizeof(cl_ad);
recvfrom(sock, buff_receive, buffer_size, 0, (struct sockaddr*)&cl_ad, &cl_ad_size)
    Save received data to buff
    Save sender address information in cl_ad
    Transmit data to the sender using cl_ad

sendto(sock, buff_send, strlen(buff_send)+1,0,(struct sockaddr*)&cl_ad,sizeof(cl_ad));
    +1 to include NULL

ex)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/socket.h>

#define BUFF_SIZE 1024

int main(void)
{
    int sock;
    int client_addr_size;

    struct sockaddr_in server_addr;
    struct sockaddr_in client_addr;

    char buff_rcv[BUFF_SIZE+5];
    char buff_snd[BUFF_SIZE+5];



    sock = socket( PF_INET, SOCK_DGRAM, 0);
    
    if( -1 == sock)
    {
        printf("Failed to create socket n");
        exit(1);
    }

    memset( &server_addr, 0, sizeof( server_addr));
    server_addr.sin_family = AF_INET;
    server_addr.sin_port = htons(4000);
    server_addr.sin_addr.s_addr= htonl(INADDR_ANY);

    if( -1 == bind( sock, (struct sockaddr*)&server_addr, sizeof( server_addr)))
    {
        printf("bind() execution error n");
        exit(1);
    }

    while( 1)
    {
        client_addr_size = sizeof( client_addr);
        recvfrom( sock, buff_rcv, BUFF_SIZE, 0,
                        (struct sockaddr*)&client_addr, &client_addr_size);
        printf( "receive: %sn", buff_rcv);
        
        sprintf( buff_snd, "%s%s", buff_rcv, buff_rcv);
        sendto( sock, buff_snd, strlen( buff_snd)+1, 0, // +1: Send including NULL
                        (struct sockaddr*)&client_addr, sizeof(client_addr));
    }
}

UDP Send Socket

struct sockaddr_in sv_ad;
memset(&sv_ad, 0, sizeof(sv_ad));
sv_ad.sin_family = AF_INET;
sv_ad.sin_port = htons(4000);
sv_ad.sin_addr.s_addr = inet_addr("127.0.0.1")
sendto(sock, argv[1], strlen(argv[1])+1, 0, (struct sockaddr *)&sv_ad, sizeof(sv_ad));
    +1 to include NULL
    Socket is registered in the kernel through sendto
struct sockaddr_in sv_ad;
int sv_ad_size
    Declare a variable to receive information from the sender through recvfrom()
sv_ad_size = sizeof(sv_ad);
recvfrom(sock,buff_receive, BUFF_SIZE,0,(struct sockaddr*)*&sv_ad, &sv_ad_size);
If there is received data, the data is stored in the buff.

ex)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/socket.h>

#define BUFF_SIZE 1024

int main( int argc, char **argv)
{
   int sock;
   int server_addr_size;

   struct sockaddr_in server_addr;

   char buff_rcv[BUFF_SIZE+5];


   sock = socket( PF_INET, SOCK_DGRAM, 0);
   
   if( -1 == sock)
   {
      printf("Failed to create socket n");
      exit(1);
   }

   memset( &server_addr, 0, sizeof( server_addr));
   server_addr.sin_family = AF_INET;
   server_addr.sin_port = htons(4000);
   server_addr.sin_addr.s_addr= inet_addr("127.0.0.1");

   sendto( sock, argv[1], strlen( argv[1])+1, 0, // +1: Send including NULL
            (struct sockaddr*)&server_addr, sizeof( server_addr));

   server_addr_size = sizeof( server_addr);
   recvfrom( sock, buff_rcv, BUFF_SIZE, 0,
            (struct sockaddr*)&server_addr, &server_addr_size);
   printf( "receive: %sn", buff_rcv);
   close( sock);
   
   return 0;
}

Network

Unlike TCP, UDP uses recvfrom() and sendto() instead of read() and write().
Send and receive data while changing the system address freely. <--> Once TCP is connected, it exchanges data with the system.
    In recvfrom, not only the received data, but also the sender information can be obtained.
    sento can set the destination to send.

bind()
    Assign an address to a socket, assign a port number, and register it in the kernel
    When registered in the kernel, data is continuously received from the client.
    When allocating address and port to socket, use sockaddr_in structure

recvfrom()
    Data received
sendto()
    Data transfer

Use recvfrom() to receive the sender's address information.

main

int main(int argc, char*argv[])
argc
    Number of data passed to main() function
argv
    It is the actual data passed to the main() function, and is composed of an array of char type pointers.
    The first string is the execution path of the program.

RL

Methodology when knowing MDP

 1. A small problem
 2. Knowing the MDP = Knowing the compensation function and the transition probability matrix. 
    (In other words, if you execute action A in state S, you know in advance how the next state will be determined and what will be the reward.)
    Planning: When you know a policy, the process of improving it by using it
        PREDICTION Problem: The problem of evaluating the value of each state given a policy.
        CONTROL Problem: Finding the Optimal Policy Function
*Table-based methodology: mainly because it is a small problem

Value evaluation: iterative policy evaluation

After initializing the values ​​of the table, a methodology that gradually updates the values ​​
    written in the table by repeatedly using the Bellman expectation equation
    You know the reward function and the probability of the transition. (Reward = -1 fixed, gamma = 1)

    1. Table initialization
    2. Update the value of one state
        Bellman Expectation Equation: An equation that expresses the value of the current state S as a value to the next state S'
        In the case of exit status, the value is '0'
    3. Apply the process of 2 for all states
    4. Repeat the process 2~3: eventually converge to the actual value

Finding the best policy: Policy iteration

A methodology that alternates between policy evaluation and policy improvement and repeats until policy convergence
    1. Initialize with a random policy
    2. Policy evaluation: Iterative policy evaluation, in this step, the value of each state is calculated for a fixed policy.
        When a policy is followed, it is called a policy evaluation 
            because it is the valuation of each state. Even if you evaluate it once, it becomes optimal.
    3. Policy improvement: greedy policy creation
    4. Repeat 2~3
    5. The place of convergence becomes the optimum policy and optimum value.

Finding the best policy-Value iteration

Only looking at the optimal value created by the optimal policy
Table-based methodology
If you know all the MDP and know the optimal value, you can get the optimal policy right away.