<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
  <channel>
    <title>기술 블로그 안하세요?</title>
    <link>https://quasar529.tistory.com/</link>
    <description>이제 진짜 하려고요...</description>
    <language>ko</language>
    <pubDate>Wed, 24 Jun 2026 11:33:46 +0900</pubDate>
    <generator>TISTORY</generator>
    <ttl>100</ttl>
    <managingEditor>quasar529</managingEditor>
    <image>
      <title>기술 블로그 안하세요?</title>
      <url>https://tistory1.daumcdn.net/tistory/6964522/attach/af77685aaf3941a2acda0fa55ff9fb49</url>
      <link>https://quasar529.tistory.com</link>
    </image>
    <item>
      <title>[Paper Review] CPR: Retrieval Augmented Generation for Copyright Protection</title>
      <link>https://quasar529.tistory.com/7</link>
      <description>&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;지난&amp;nbsp;포스팅의&amp;nbsp;주제였던&amp;nbsp;Copyright&amp;nbsp;Protection이&amp;nbsp;적용된&amp;nbsp;RAG에&amp;nbsp;대한&amp;nbsp;논문입니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;figure id=&quot;og_1717334936754&quot; contenteditable=&quot;false&quot; data-ke-type=&quot;opengraph&quot; data-ke-align=&quot;alignCenter&quot; data-og-type=&quot;website&quot; data-og-title=&quot;CPR: Retrieval Augmented Generation for Copyright Protection&quot; data-og-description=&quot;Retrieval Augmented Generation (RAG) is emerging as a flexible and robust technique to adapt models to private users data without training, to handle credit attribution, and to allow efficient machine unlearning at scale. However, RAG techniques for image &quot; data-og-host=&quot;arxiv.org&quot; data-og-source-url=&quot;https://arxiv.org/abs/2403.18920&quot; data-og-url=&quot;https://arxiv.org/abs/2403.18920v1&quot; data-og-image=&quot;https://scrap.kakaocdn.net/dn/diUjO1/hyWg0nYWFI/IBOFMc7dCoEmMM0MhpORmK/img.png?width=1200&amp;amp;height=700&amp;amp;face=0_0_1200_700,https://scrap.kakaocdn.net/dn/v8Giq/hyWdnkOz6c/Ya7kMlFXhZz2pnW6YCeqKk/img.png?width=1000&amp;amp;height=1000&amp;amp;face=0_0_1000_1000&quot;&gt;&lt;a href=&quot;https://arxiv.org/abs/2403.18920&quot; target=&quot;_blank&quot; rel=&quot;noopener&quot; data-source-url=&quot;https://arxiv.org/abs/2403.18920&quot;&gt;
&lt;div class=&quot;og-image&quot; style=&quot;background-image: url('https://scrap.kakaocdn.net/dn/diUjO1/hyWg0nYWFI/IBOFMc7dCoEmMM0MhpORmK/img.png?width=1200&amp;amp;height=700&amp;amp;face=0_0_1200_700,https://scrap.kakaocdn.net/dn/v8Giq/hyWdnkOz6c/Ya7kMlFXhZz2pnW6YCeqKk/img.png?width=1000&amp;amp;height=1000&amp;amp;face=0_0_1000_1000');&quot;&gt;&amp;nbsp;&lt;/div&gt;
&lt;div class=&quot;og-text&quot;&gt;
&lt;p class=&quot;og-title&quot; data-ke-size=&quot;size16&quot;&gt;CPR: Retrieval Augmented Generation for Copyright Protection&lt;/p&gt;
&lt;p class=&quot;og-desc&quot; data-ke-size=&quot;size16&quot;&gt;Retrieval Augmented Generation (RAG) is emerging as a flexible and robust technique to adapt models to private users data without training, to handle credit attribution, and to allow efficient machine unlearning at scale. However, RAG techniques for image&lt;/p&gt;
&lt;p class=&quot;og-host&quot; data-ke-size=&quot;size16&quot;&gt;arxiv.org&lt;/p&gt;
&lt;/div&gt;
&lt;/a&gt;&lt;/figure&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;RAG는 &lt;span data-token-index=&quot;1&quot;&gt;private users data&lt;/span&gt;를 학습 없이 사용가능하게 하지만 모델이 retrieved samples을 그대로 복사할 위험 있습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;그래서 제시한 것이 본 논문의 &lt;b&gt;&lt;span data-token-index=&quot;0&quot;&gt;Copy-Protected generation with Retrieval (CPR)입니다.&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Inference 할 때 public private 분포의 diffusion score 섞어서 sampling 하고&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이는 NAF를 만족합니다. (Near Acess Free)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span data-token-index=&quot;0&quot;&gt;Mixed-Privacy RAG&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$D_{private}$이 parameter update에 사용되지 않아 immediate application to privacy 가능,&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;하지만 inference 때 retrtrived된 sample이 정보를 유출할 수 있습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$D$ (Safe dataset)로 학습한 public diffusion model = $s_\theta (x_t, t, c)$&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;c는 clip encoder의 결과 ($c = CLIP(&amp;lt;prompt&amp;gt;$)&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$D_{private}$ = protection 필요한 데이터셋, 본 논문에선 data for retrieval&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$D_{private}$ 의 부분집합인 $D_{retr} = \{(x_i,\phi(c_i, c_{test})) \}^m_{i=1}$ 를 generation 향상 위해 사용&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$score = || c_{test} - c_i|| + ||c_{test} - CLIP(x_i)||$&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;이를 기반으로 가장 가까운 m개 sample 뽑는다.&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\phi(c_i, c_{test}) = c_i + c_{test}$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;Mixture-of-Distribution&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;public과 private 분포 섞습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$p(x|c) = w_0 p_D (x|c) + w_1 p_D{retr}(x|c)$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$w_0 = &amp;lambda;$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$w_1 = 1&amp;minus;&amp;lambda;$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;Mixture-of-Score&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;mixture distribution에서 샘플링하기 위해 score fucntion 계산하고&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;섞은 분포에서 샘플링을 진행합니다.&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;216&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/MSC7s/btsHLOC8qwN/KkOOXX3BXZ8fhMkZg4HiN1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/MSC7s/btsHLOC8qwN/KkOOXX3BXZ8fhMkZg4HiN1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/MSC7s/btsHLOC8qwN/KkOOXX3BXZ8fhMkZg4HiN1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FMSC7s%2FbtsHLOC8qwN%2FKkOOXX3BXZ8fhMkZg4HiN1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;216&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;216&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;218&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/PkWgV/btsHMRsjprj/ymgDIK3bk5k8Wafeqrxc5k/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/PkWgV/btsHMRsjprj/ymgDIK3bk5k8Wafeqrxc5k/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/PkWgV/btsHMRsjprj/ymgDIK3bk5k8Wafeqrxc5k/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FPkWgV%2FbtsHMRsjprj%2FymgDIK3bk5k8Wafeqrxc5k%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;218&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;218&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Proposition 1.&lt;/b&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;171&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/dbd2vB/btsHKlh4MmU/kds9KSZDvQgWxDnxdscTP1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/dbd2vB/btsHKlh4MmU/kds9KSZDvQgWxDnxdscTP1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/dbd2vB/btsHKlh4MmU/kds9KSZDvQgWxDnxdscTP1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fdbd2vB%2FbtsHKlh4MmU%2Fkds9KSZDvQgWxDnxdscTP1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;171&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;171&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\hat{w}$ : fixed hyper-parameters&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\nabla_{x_t} \log_{p_D}(x_t|c)$ &amp;rarr; $s_\theta$ (diffusion model )로 근사 가능&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;근데 $D_{retr}$ 로 학습한 model은 없어 &amp;rarr; $s_{\theta_1}$&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;413&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/0BBup/btsHMcReu0z/MLva1Xr3ryMkZySwjf98KK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/0BBup/btsHMcReu0z/MLva1Xr3ryMkZySwjf98KK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/0BBup/btsHMcReu0z/MLva1Xr3ryMkZySwjf98KK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2F0BBup%2FbtsHMcReu0z%2FMLva1Xr3ryMkZySwjf98KK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;413&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;413&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;근데 &lt;span&gt;s_{\theta_1}&lt;/span&gt; 계산량 너무 많아 CLIP 으로 대체합니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;&lt;span data-token-index=&quot;0&quot;&gt;Proposition 2.&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;413&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/b7fSkA/btsHKDJz8mo/Vd9MwVnRdp9vU2OSV9f4V0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/b7fSkA/btsHKDJz8mo/Vd9MwVnRdp9vU2OSV9f4V0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/b7fSkA/btsHKDJz8mo/Vd9MwVnRdp9vU2OSV9f4V0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb7fSkA%2FbtsHKDJz8mo%2FVd9MwVnRdp9vU2OSV9f4V0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;413&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;413&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;modifying the user prompt c_test using the CLIP embeddings of the retrieved samples&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;∵ fine-tuning 안하려고&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;optimal diffusion model trained on retrieved data를 CLIP embedding으로 근사시킬 수 있다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Retrieval-Mixture-Score&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;expression for the score function of retrieval- augmented mixture of distributions&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;431&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cYe1gy/btsHLXfHkNN/RJqrZwmcKhumki0Nr2fEr0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cYe1gy/btsHLXfHkNN/RJqrZwmcKhumki0Nr2fEr0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cYe1gy/btsHLXfHkNN/RJqrZwmcKhumki0Nr2fEr0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcYe1gy%2FbtsHLXfHkNN%2FRJqrZwmcKhumki0Nr2fEr0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;431&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;431&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;span data-token-index=&quot;0&quot;&gt;Copy-Protected Generation&lt;/span&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;span data-token-index=&quot;0&quot;&gt;CPR-KL&lt;/span&gt;&lt;/h4&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1637&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bQdUP4/btsHKpSboh8/e9NJ9lRTDJyuU5NtMUiGEk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bQdUP4/btsHKpSboh8/e9NJ9lRTDJyuU5NtMUiGEk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bQdUP4/btsHKpSboh8/e9NJ9lRTDJyuU5NtMUiGEk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbQdUP4%2FbtsHKpSboh8%2Fe9NJ9lRTDJyuU5NtMUiGEk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;1637&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1637&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;q1과 q2에 access 못해 &lt;br /&gt;➡️ 대신 $&amp;nabla;_{x_t} \log \int q_t(x_t|x_0)q^{(1)}(x|c)dx_0$ , $&amp;nabla;_{x_t} \log \int q_t(x_t|x_0)q^{(2)}(x|c)dx_0$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;average the two scores at every step during backward&lt;/b&gt; diffusion using Langevin Dynamics&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Algorithm 1 &amp;rarr; k-NAF 보장한다&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;optimal score 몰라 &amp;rarr; DNN으로 근사&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;inference time의 computation cost가 2배&lt;/li&gt;
&lt;/ul&gt;
&lt;blockquote data-ke-style=&quot;style3&quot;&gt;...smoothly interpolates between N(0,I) at t = T...&lt;br /&gt;...Langevin dynamics converge exponentially fast to the distribution estimated by the gradients...&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;Experiments&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;675&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/d13JRV/btsHKFmQ2Nz/VHexYvJE6uc08je4GBH2mK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/d13JRV/btsHKFmQ2Nz/VHexYvJE6uc08je4GBH2mK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/d13JRV/btsHKFmQ2Nz/VHexYvJE6uc08je4GBH2mK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fd13JRV%2FbtsHKFmQ2Nz%2FVHexYvJE6uc08je4GBH2mK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;675&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;675&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;w1이 커지면 k는 작아져&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;w1커지면 k작아짐 &amp;rarr; 더 safe 해짐&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;즉 Textual prompts (from clip)과의 유사성을 높아지고&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Retrieved images와의 유사성은 감소합니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;현재 Copyright Protection을 적용한 논문들은&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;더 추가되거나 발전시키는 부분은 없고&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;저자의 방법이 기존에 제시된 NAF를 만족한다고 주장하는 것에서 더 나아가지 못하는 모습입니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;아마 아직 연구 초기 단계라 그런 것 같습니다.&lt;/p&gt;</description>
      <category>Paper Review</category>
      <author>quasar529</author>
      <guid isPermaLink="true">https://quasar529.tistory.com/7</guid>
      <comments>https://quasar529.tistory.com/7#entry7comment</comments>
      <pubDate>Sun, 2 Jun 2024 22:40:03 +0900</pubDate>
    </item>
    <item>
      <title>[Paper Review] On Provable Copyright Protection for Generative Models</title>
      <link>https://quasar529.tistory.com/6</link>
      <description>&lt;p data-ke-size=&quot;size16&quot;&gt;Privacy를 지키기 위해 DP라 불리는 Differential Privacy가 주로 사용됩니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;하지만 이 방법은 성능의 한계가 뚜렷해 적극적으로 사용하기 힘듭니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이의 대안으로 본 논문은 Copyright 개념을 제시하며&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Privacy보다는 덜 엄밀하지만 충분히 정보 보호를 할 수 있는 방법에 대해 이야기합니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;976&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/KGTn3/btsHCUKfo4v/kVZyz0cLt3rpXkgkU3a9U1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/KGTn3/btsHCUKfo4v/kVZyz0cLt3rpXkgkU3a9U1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/KGTn3/btsHCUKfo4v/kVZyz0cLt3rpXkgkU3a9U1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FKGTn3%2FbtsHCUKfo4v%2FkVZyz0cLt3rpXkgkU3a9U1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;976&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;976&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;맨 왼쪽 = p&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;가운데 두개 = q1,q2 (q1은 q2이미지 없다 vice versa)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;마지막 = p,q1,q2 이용 &amp;rarr; p_k (둘다 없음)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Dataset : CIFAR-10 (along with horizontal flips) augmented with multiple copies of two images taken from the CIFAR-10 test set&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;2장을 test set에서 가져오고, 이를 copyrighted works로 가정
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;전체의 2%&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Model p&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;full dataset으로 학습&lt;/li&gt;
&lt;li&gt;two copyrighted works를 생성&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Algorithm&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;copyrighted images가 나눠지도록 두개의 데이터셋으로 분리&lt;/li&gt;
&lt;li&gt;CP-k using a threshold of k = 500 : $p_k$&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$max_{i&amp;isin;\{1,2\}}(log(p(y)/q_i(y))$&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;분포 bimodal&lt;/li&gt;
&lt;li&gt;first mode는 그냥이미지&lt;/li&gt;
&lt;li&gt;second mode는 모두 copyrighted images&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;733&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/KmROy/btsHDcRztb6/Us53ttHjpMEYKOpdKugh50/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/KmROy/btsHDcRztb6/Us53ttHjpMEYKOpdKugh50/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/KmROy/btsHDcRztb6/Us53ttHjpMEYKOpdKugh50/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FKmROy%2FbtsHDcRztb6%2FUs53ttHjpMEYKOpdKugh50%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;733&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;733&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;결과적으로 $p_k$를 파란색, 초록색 선의 분포를 가지게 만드는 것이 목표입니다.&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;DP와 유사해보이지만, 사실 매우 다릅니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Copyright가 훨씬 느슨한 기준을 가지고 있어 달성하기 수월합니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;본 논문도 이를 명시해서 설명합니다.&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;Comparison with Differentially Private Prediction&lt;/b&gt;&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Privacy&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Privacy is focused on an individual and the attributes of that individual&lt;/li&gt;
&lt;li&gt;if any particular generative output leaks even a few bits about a training sample, this could still be a significant privacy violation&lt;/li&gt;
&lt;li&gt;privacy requires that the output of a mechanism does not reveal whether or not an individual&amp;rsquo;s data was in the database&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Copyright&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;copyright protection is only for a specific piece of work&lt;/li&gt;
&lt;li&gt;a few bits of leakage are unlikely to constitute a copyright violation since copyright requires a minimum amount of information content&lt;/li&gt;
&lt;li&gt;we only need to ensure that no particular output is substantially similar to a copyrighted work&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>Paper Review</category>
      <author>quasar529</author>
      <guid isPermaLink="true">https://quasar529.tistory.com/6</guid>
      <comments>https://quasar529.tistory.com/6#entry6comment</comments>
      <pubDate>Sun, 26 May 2024 23:28:38 +0900</pubDate>
    </item>
    <item>
      <title>[Paper Review] FLORA: Low-Rank Adapters Are Secretly Gradient Compressors</title>
      <link>https://quasar529.tistory.com/5</link>
      <description>&lt;p data-ke-size=&quot;size16&quot;&gt;이번 포스팅은 저에게 큰 절망감을 안겨줬던 논문에 대해 이야기하려 합니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;저희가 해오던 연구를 거의 반파시킨 &lt;b&gt;FLORA&lt;/b&gt;라는 논문입니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;한도 끝도 없이 쓸 수 있지만 마음 아프니 짧게 포스팅하겠습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;스크린샷 2024-04-14 오후 7.43.27.png&quot; data-origin-width=&quot;1978&quot; data-origin-height=&quot;686&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bzTsER/btsGDKgWUCd/6rXxifVN2S8NqofYV4adJK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bzTsER/btsGDKgWUCd/6rXxifVN2S8NqofYV4adJK/img.png&quot; data-alt=&quot;하오 용창... 잊지 않겠습니다&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bzTsER/btsGDKgWUCd/6rXxifVN2S8NqofYV4adJK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbzTsER%2FbtsGDKgWUCd%2F6rXxifVN2S8NqofYV4adJK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1978&quot; height=&quot;686&quot; data-filename=&quot;스크린샷 2024-04-14 오후 7.43.27.png&quot; data-origin-width=&quot;1978&quot; data-origin-height=&quot;686&quot;/&gt;&lt;/span&gt;&lt;figcaption&gt;하오 용창... 잊지 않겠습니다&lt;/figcaption&gt;
&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;본 논문은 LoRA의 작동원리를 해석하고 이를 Meomory Efficient하게 적용하는 방법을 제시하는 논문입니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;결론부터 말하면,&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style3&quot;&gt;&lt;span style=&quot;color: #f89009;&quot;&gt;LoRA는 사실상 A를 활용해 W의 Gradient를 Down-Projection, Up-Projection을 반복하는 과정이다.&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;라고 볼 수 있겠습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;즉 Random Projection을 통해 Gradient를 Compress하고 다시 Decompress 합니다.&amp;nbsp;&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;여러 가정이 뒷받침되어야 하지만, LoRA를 사용한 학습 원리를 명료하게 설명한다는 것은 틀림없습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;구체적인 증명과정을 살펴보겠습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style5&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;LoRA를 사용하면 다음 Matrix가 존재합니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$W$ : Pre-trained Weight Matrix (n x m)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$B$ : LoRA B initialized by zero (n x r)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$A$: LoRA A&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;&lt;span&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;initialized by &lt;span style=&quot;font-family: -apple-system, BlinkMacSystemFont, 'Helvetica Neue', 'Apple SD Gothic Neo', Arial, sans-serif; letter-spacing: 0px;&quot;&gt;normal distribution &lt;/span&gt;(r x m)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이 때, Forward Pass는 다음과 같습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$y&amp;nbsp;=&amp;nbsp;(W+BA)x&amp;nbsp;=&amp;nbsp;Wx+BAx$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;($BA$는 $W$를 변화시키지 않습니다.)&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;Back-Propagation에서 W의 Gradient는 다음과 같습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$&amp;nabla;_WL_t&amp;nbsp;=&amp;nbsp;\frac{\delta&amp;nbsp;L}{\delta&amp;nbsp;y}x^T$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이 때, $B$, $A$의 Gradient는 다음과 같습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\frac{\delta L}{\delta A} = B^T \frac{\delta L}{\delta y}x^T = B^T (\nabla_WL)$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\frac{\delta&amp;nbsp;L}{\delta&amp;nbsp;B}&amp;nbsp;=&amp;nbsp;\frac{\delta&amp;nbsp;L}{\delta&amp;nbsp;y}x^TA^T&amp;nbsp;&amp;nbsp;=&amp;nbsp;(\nabla_WL)A^T$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style4&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;SGD 과정을 보면 이와 같습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$A_{t+1}&amp;nbsp;&amp;larr;A_t&amp;nbsp;&amp;minus;&amp;eta;B_t^⊤(&amp;nabla;_WL_t)$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$B_{t+1}&amp;nbsp;&amp;larr;B_t&amp;nbsp;&amp;minus;&amp;eta;(&amp;nabla;_WL_t)A^⊤_t&amp;nbsp;$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이 때 W의 Gradient인 $&amp;nabla;_WL_t $의 프로베니우스 놈이 $L$보다 작거나 같다고 가정합니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;즉, Model이 Finiite &lt;span style=&quot;font-family: -apple-system, BlinkMacSystemFont, 'Helvetica Neue', 'Apple SD Gothic Neo', Arial, sans-serif; letter-spacing: 0px;&quot;&gt;Euclidean ball에 존재한다고 가정합니다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: -apple-system, BlinkMacSystemFont, 'Helvetica Neue', 'Apple SD Gothic Neo', Arial, sans-serif; letter-spacing: 0px;&quot;&gt;그러면 $A_t$, $B_t$의 Dynamics는 다음과 같아집니다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: -apple-system, BlinkMacSystemFont, 'Helvetica Neue', 'Apple SD Gothic Neo', Arial, sans-serif; letter-spacing: 0px;&quot;&gt;$A_T = A_0+&amp;eta;A_0f_A(T) $&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: -apple-system, BlinkMacSystemFont, 'Helvetica Neue', 'Apple SD Gothic Neo', Arial, sans-serif; letter-spacing: 0px;&quot;&gt;$B_T&amp;nbsp;=&amp;eta;f_B(T)A_0^⊤$&lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style4&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: -apple-system, BlinkMacSystemFont, 'Helvetica Neue', 'Apple SD Gothic Neo', Arial, sans-serif; letter-spacing: 0px;&quot;&gt;그리고 LoRA Update의 Dynamics를 살펴보겠습니다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$W + (B_0 + ∆B)(A_0 + ∆A)$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$= W + B_0 A_0+ B_0∆A + ∆BA_0+ ∆B∆A $&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$= W + ∆BA_0 + ∆B∆A$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$B_0$는 0으로 초기화 했고, Learning Rate &amp;eta;가 충분히 작다면 다음처럼 정리됩니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$W + (B_0 + ∆B)(A_0 + ∆A)$ &amp;asymp; $W + ∆BA_0$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;color: #333333; text-align: start;&quot;&gt;➡ &lt;/span&gt;$W + &amp;eta; \hat{f_B}(T)A_0^⊤A_0$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이 때 $\hat{f_B}$는 다음처럼 Update 됩니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;$\hat{f_B}(t+1):=&amp;nbsp;\hat{f_B}(t)&amp;minus;&amp;nabla;_WL_t$&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style4&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;최종적으로 다음과 같습니다&lt;/p&gt;
&lt;blockquote data-ke-style=&quot;style3&quot;&gt;&lt;span style=&quot;color: #f89009;&quot;&gt;$W&amp;minus;&amp;eta;&amp;sum;^T_{t=0}[(&amp;nabla;_WL_t)A^T_0A_0]$&lt;/span&gt;&lt;/blockquote&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;즉 LoRA 학습은,&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;W의 Gradient인 $&amp;nabla;_WL_t$&amp;nbsp;를 $A^T_0$&amp;nbsp;로 Down-projection해서 Compresse하고, &lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;$A_0$로 Up-projection해서 Decompress하는 과정입니다.&lt;/b&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style6&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;제가 23년 초에 처음 LoRA를 봤을 때는 인용수가 약 500회 정도였는데&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;24년 4월 현재 약 3400회가 됐네요.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;LoRA가 얼마나 혁신적이고 효과적이었으며, 파고들 부분이 많았다는 것을 알 수 있습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;아쉬움이 많이 남습니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;추후에 그동안 해왔던 연구 과정을 가설, 실험 등을 포함하여 업로드 해볼까 합니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;연구...어렵다... &amp;zwj; &lt;/p&gt;</description>
      <category>Paper Review/PEFT</category>
      <category>LLM</category>
      <category>Lora</category>
      <category>peft</category>
      <author>quasar529</author>
      <guid isPermaLink="true">https://quasar529.tistory.com/5</guid>
      <comments>https://quasar529.tistory.com/5#entry5comment</comments>
      <pubDate>Sun, 14 Apr 2024 20:12:03 +0900</pubDate>
    </item>
    <item>
      <title>[Paper Briefing] 2403. LoRA-SP / AutoLoRA</title>
      <link>https://quasar529.tistory.com/4</link>
      <description>&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;24년 3월에 arXriv에 올라온 두 편의 LoRA 관련 논문을 살펴보겠습니다.&lt;/span&gt;&lt;/p&gt;
&lt;hr data-ke-style=&quot;style1&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;LoRA-SP: Streamlined Partial Parameter Adaptation for Resource- Efficient Fine-Tuning of Large Language Models&lt;/span&gt;&lt;/h4&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1267&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bdbI7f/btsF2QP5oFM/wkH33PN2poFyN0SGpdNV20/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bdbI7f/btsF2QP5oFM/wkH33PN2poFyN0SGpdNV20/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bdbI7f/btsF2QP5oFM/wkH33PN2poFyN0SGpdNV20/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbdbI7f%2FbtsF2QP5oFM%2FwkH33PN2poFyN0SGpdNV20%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;1267&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1267&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;23년 발표된 논문인 &lt;a href=&quot;https://openreview.net/forum?id=RbKThNNFxr&amp;amp;referrer=%5Bthe%20profile%20of%20Xiaowen%20Chu%5D(%2Fprofile%3Fid%3D~Xiaowen_Chu2)&quot;&gt;LoRA-FA&lt;/a&gt;&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;LoRA 학습 시 A는 Freeze 하고 B만 학습 시키는 것이 A,B 둘 다 학습시키는 것과 Comparable한 성능을 보인다고 주장&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;단, 본 논문은 Contribution이 약하다(memory saving이 크지 않다 등)는 이유로 ICLR에서 Reject&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;➡️ 즉 LoRA도 충분히 작지만, 여기서도 Redundancy가 존재한다고 볼 수 있음&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;LoRA-SP도 일부를 Freeze하는 비슷한 방법을 제시&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;&lt;b&gt;Binary Matrix S&lt;/b&gt; 를 도입해 A,B 모두 &lt;b&gt;절반만 Update&lt;/b&gt;&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;이때 S는 Random&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;$\Delta W = (A⨀S)(B⨀S)^T$&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;추가적으로 efficiency를 위해 &lt;i&gt;Quantization&lt;/i&gt;, &lt;i&gt;Selective Activation Recomputation&lt;/i&gt; 사용&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;&lt;a href=&quot;https://arxiv.org/abs/2205.05198&quot;&gt;Selective Activation Recomputation&lt;/a&gt; 이란 backward 시 필요한 activation만 계산하므로써 memory utilization을 최적화하는 기법&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-filename=&quot;스크린샷 2024-03-24 오후 9.52.26.png&quot; data-origin-width=&quot;1764&quot; data-origin-height=&quot;684&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/XTEP1/btsF3ntn0s9/VxwEWkArM4VOGytKkrImOk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/XTEP1/btsF3ntn0s9/VxwEWkArM4VOGytKkrImOk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/XTEP1/btsF3ntn0s9/VxwEWkArM4VOGytKkrImOk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FXTEP1%2FbtsF3ntn0s9%2FVxwEWkArM4VOGytKkrImOk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1764&quot; height=&quot;684&quot; data-filename=&quot;스크린샷 2024-03-24 오후 9.52.26.png&quot; data-origin-width=&quot;1764&quot; data-origin-height=&quot;684&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;LoRA보다 Parameters수가 정확히 절반만큼 감소하지만 성능은 비슷&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;개인적인 경험으론 LoRA는 쉬운 task에서는 어떻게 변형해도 실험결과가 대동소이하게 잘 나오는 경우가 많은데,&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;본 논문도 여러 실험을 하다가 얻어 걸려서&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;무지성 아카이브 업로드를 한 것이 아닌가라는 생각이 듭니다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;S가 어떤 기준에 따라 결정되는 것도 아니고,&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;중간에 논리 전개 과정에 맞지 않는 Quantization, Selective Activation Recomputation을 &lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;뜬금없이 사용해서 더욱 그렇습니다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;또한 가장 강조하는 것이 enhancing computational efficiency, reducing memory usage인데,&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;정작 메모리 사용량에 대한 구체적인 언급이 없는 것이 결정적입니다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;물론 제가 인비져블 썸띵을 보지 못한 것일 수도 있지만...&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;잘 모르겠네요.  &lt;/span&gt;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style8&quot; /&gt;
&lt;h4 data-ke-size=&quot;size20&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;AutoLoRA: Automatically Tuning Matrix Ranks in Low-Rank Adaptation Based on Meta Learning&lt;/span&gt;&lt;/h4&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1175&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/zDj6B/btsF17ELgXy/ofL3EWL89ofl04Bya1Ri91/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/zDj6B/btsF17ELgXy/ofL3EWL89ofl04Bya1Ri91/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/zDj6B/btsF17ELgXy/ofL3EWL89ofl04Bya1Ri91/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FzDj6B%2FbtsF17ELgXy%2FofL3EWL89ofl04Bya1Ri91%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;1175&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1175&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;&lt;a href=&quot;https://openreview.net/forum?id=lq62uWRJjiY&quot;&gt;AdaLoRA&lt;/a&gt;&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;Layer 별로 Task에 따라 중요도가 달라지기 때문에, 모두 동일한 rank를 주는 것인 비합리적&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;Importance Score에 따라 정해진 Budget하에 LoRA Rank를 다르게 부여하는 방법을 제시&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;Meta Learning을 통해 Layer 별 최적 Lora Rank 찾는다&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;Meta Learning이 사용됐다고 말하는 이유는, 적절한 Rank를 찾는 Selection variables을 학습하는 과정이 포함됐기 때문&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;Train Dataset을 Train/Valid 로 나눔&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;train dataset으로 U,V를 optimize하고 valid dataset으론 selection variables을 optimize&lt;/span&gt;&lt;/li&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;본 논문이 AdaLoRA에 대해 갖는 차별점&lt;/span&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;Importance score 와 Update Matrices가 모두 같은 dataset에 대해 학습하기 때문에 Overfitting 가능성 있다고 주장&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;646&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/b00qFm/btsF1R915r0/KHHOGAbA4LCu6GMLUyuig0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/b00qFm/btsF1R915r0/KHHOGAbA4LCu6GMLUyuig0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/b00qFm/btsF1R915r0/KHHOGAbA4LCu6GMLUyuig0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fb00qFm%2FbtsF1R915r0%2FKHHOGAbA4LCu6GMLUyuig0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;646&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;646&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;690&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bq7QYK/btsF2wj8t8u/OegI75EQcRBMRznasFxIo0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bq7QYK/btsF2wj8t8u/OegI75EQcRBMRznasFxIo0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bq7QYK/btsF2wj8t8u/OegI75EQcRBMRznasFxIo0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fbq7QYK%2FbtsF2wj8t8u%2FOegI75EQcRBMRznasFxIo0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;690&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;690&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;AdaLoRA와 동일한 Param을 갖지만 더 좋은 성능을 낸다고 주장&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;843&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/wMTba/btsF3epJwsM/QbImt16cu3BnNL7siaSQkK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/wMTba/btsF3epJwsM/QbImt16cu3BnNL7siaSQkK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/wMTba/btsF3epJwsM/QbImt16cu3BnNL7siaSQkK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FwMTba%2FbtsF3epJwsM%2FQbImt16cu3BnNL7siaSQkK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;843&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;843&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;단, 더 높은 Cost 소요&lt;/span&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;AdaLoRA는 Layer 별 Optimal Rank를 동적으로 찾는 방법을 발견하므로써&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;LoRA가 더 효율적일 수 있다라는 가설을 제시하고 증명했습니다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;이후에 일괄적인 Rank 할당이 아닌 Dynamic하게 Rank를 할당하는 여러 방법이 등장하는데 (ex. &lt;a href=&quot;https://aclanthology.org/2023.eacl-main.239&quot;&gt;DyLoRA&lt;/a&gt;)&lt;/span&gt;&lt;br /&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;AutoLoRA도 그 중 하나입니다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;그러나 AdaLoRA보다 Outperform(사실 그냥 또이또이...) 하다고 하지만 Cost가 거의 2배여서 의미가 있나 싶고,&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;Optimal Rank를 찾을 때 Dataset을 train/valid로 나누는 것이 Resilient하다고 주장하지만&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;저는 오히려 같은 Dataset에 대해 찾는 것이 더 적절하다고 생각합니다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;이 논문 역시 AdaLoRA와 비교했을 때 달라지거나 크게 새로운 것을 제시하지 않아&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: 'Nanum Gothic';&quot;&gt;큰 의미가 있진 않다고 생각합니다.&lt;/span&gt;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;hr contenteditable=&quot;false&quot; data-ke-type=&quot;horizontalRule&quot; data-ke-style=&quot;style2&quot; /&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>Paper Review/PEFT</category>
      <author>quasar529</author>
      <guid isPermaLink="true">https://quasar529.tistory.com/4</guid>
      <comments>https://quasar529.tistory.com/4#entry4comment</comments>
      <pubDate>Sun, 24 Mar 2024 22:51:18 +0900</pubDate>
    </item>
    <item>
      <title>BERT : Bidirectional Encoder Representations from Transformers</title>
      <link>https://quasar529.tistory.com/3</link>
      <description>&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1261&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bZgGOa/btsFF1Sekbm/s77XUWsYvIVPrrRs5q55Dk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bZgGOa/btsFF1Sekbm/s77XUWsYvIVPrrRs5q55Dk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bZgGOa/btsFF1Sekbm/s77XUWsYvIVPrrRs5q55Dk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbZgGOa%2FbtsFF1Sekbm%2Fs77XUWsYvIVPrrRs5q55Dk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;1261&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1261&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;Input Representation&lt;/h3&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Token Embeddings&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;WordPiece 토크나이저를 사용하여 문장을 토큰으로 분해
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;바이트 페어 인코딩(Byte Pair Encoding, BPE)의 유사 알고리즘&lt;/li&gt;
&lt;li&gt;흔한 단어를 그대로 유지하고, 흔하지 않은 단어는 부분 단어(subword)로 분해&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;sentence의 첫번째 token은 언제나&amp;nbsp;[CLS] (special classification token)
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;여기에&amp;nbsp;간단한 classifier를 붙이면 단일 문장, 또는 연속된 문장 분류 가능&lt;/li&gt;
&lt;li&gt;분류 작업 안하면 무시&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;문장의 구분을 위해 문장의 끝에 [SEP] 토큰을 사용&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Segement Embedding&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;문장 A와 문장 B를 구분하고, 각 문장의 시작과 끝을 알려주는 방법
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;첫 번째 문장의 모든 토큰에는 'A' 임베딩을 부여하고, 두 번째 문장의 모든 토큰에는 'B' 임베딩을 부여&lt;/li&gt;
&lt;li&gt;첫 번째 문장이 끝나고 두 번째 문장이 시작되는 지점에는 [SEP] 토큰이 삽입됨 &amp;rarr; BERT는 문장의 시작과 끝, 그리고 문장 사이의 경계를 인식&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Position Embedding&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;토큰의 순서 정보 반영&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;Masked Language Model (MLM)&lt;/b&gt;&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1356&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bMV0pB/btsFF4BoHEr/fyij9J68dFjxwuZkdE42Zk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bMV0pB/btsFF4BoHEr/fyij9J68dFjxwuZkdE42Zk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bMV0pB/btsFF4BoHEr/fyij9J68dFjxwuZkdE42Zk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbMV0pB%2FbtsFF4BoHEr%2Ffyij9J68dFjxwuZkdE42Zk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;1356&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1356&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;일부 단어를 가려서(masking) 모델이 그 가려진 단어를 예측하도록 하는 방식으로 작동 &amp;rarr; 이를 통해 모델은 양쪽 방향의 문맥을 모두 고려&lt;/li&gt;
&lt;li&gt;먼저 단어 중의 일부를&amp;nbsp;[MASK]&amp;nbsp;token 으로 바꾼다
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;바꾸는 비율은&amp;nbsp;&lt;b&gt;15%&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;80% : token을&amp;nbsp;&lt;b&gt;[MASK] token으로&lt;/b&gt;ex)&amp;nbsp;my dog is&amp;nbsp;hairy&amp;nbsp;-&amp;gt; my dog is&amp;nbsp;[MASK]&lt;/li&gt;
&lt;li&gt;10% : token을&amp;nbsp;&lt;b&gt;random word로&lt;/b&gt;.&amp;nbsp;ex)&amp;nbsp;my dog is&amp;nbsp;hairy&amp;nbsp;-&amp;gt; my dog is&amp;nbsp;apple
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;실제 비율은 1.5% 밖에 되지 않아 모델의 성능에 크게 영향 없음&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;10% : token을&amp;nbsp;&lt;b&gt;원래 단어 그대로&lt;/b&gt;&amp;nbsp;.&amp;nbsp;ex)&amp;nbsp;my dog is&amp;nbsp;hairy&amp;nbsp;-&amp;gt; my dog is&amp;nbsp;hairy
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;실제 관측 단어에 대한 representation을 bias&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;LM의&amp;nbsp;&lt;b&gt;left-to-right&lt;/b&gt;&amp;nbsp;(혹은 r2l)을 통하여 문장 전체를&amp;nbsp;predict하는 방법론과는 달리,&amp;nbsp;[MASK]&amp;nbsp;token 만을&amp;nbsp;predict
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;[MASK] token이 cross entropy loss를 통해 원래 token을 예측&lt;/li&gt;
&lt;li&gt;[MASK]&amp;nbsp;token은&amp;nbsp;&lt;b&gt;pre-training&lt;/b&gt;에만 사용되고,&amp;nbsp;&lt;b&gt;fine-tuning&lt;/b&gt;시에는 사용되지 않음 &amp;rarr; 왜냐하면&amp;nbsp;&lt;b&gt;[MASK] token이 fine-tuning과정에서는 나타나지 않기 때문&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;Next Sentence Prediction (NSP)&lt;/b&gt;&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;입력으로 두개의 문장을 받아 두 번째 문장이 첫 번째 문장의 다음에 오는 문장인지를 맞추는 Binary Classification을 학습
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;QA&lt;/b&gt;나&amp;nbsp;&lt;b&gt;Natural Language Inference&lt;/b&gt;(&lt;b&gt;NLI&lt;/b&gt;)와 같이 두 문장 사이의 관계를 이해하는 것이 중요하기 때문&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;작동 원리
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;'A'와 'B' 두 가지 유형의 문장&lt;/li&gt;
&lt;li&gt;'A' 문장은 원래의 텍스트에서 가져온 문장이고, 'B' 문장은 'A' 문장 다음에 오는 문장일 수도 있고, 전혀 관련이 없는 무작위의 문장일 수도 있다&lt;/li&gt;
&lt;li&gt;&lt;b&gt;'B' 문장이 'A' 문장 바로 다음에 오는 문장 : 'IsNext'&lt;/b&gt;&lt;/li&gt;
&lt;li&gt;&lt;b&gt;만약 'B' 문장이 'A' 문장과 관련이 없는 무작위의 문장: 'NotNext'&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;두 문장 사이의 관계를 예측하여 문장 간의 관계를 이해하고, 문장의 순서와 문맥을 파악하는 능력을 향상
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;50% : sentence A, B가 실제 next sentence&lt;/li&gt;
&lt;li&gt;50% : sentence A, B가 corpus에서 random으로 뽑힌(관계가 없는) 두 문장
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;예시&lt;/li&gt;
&lt;li&gt;Input =&amp;nbsp;[CLS] the man went to [MASK] store [SEP] he bought a gallon [MASK] milk [SEP]
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;LABEL =&amp;nbsp;IsNext&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;Input =&amp;nbsp;[CLS] the man [MASK] to the store [SEP] penguin [MASK] are flight ##less birds [SEP]
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Label =&amp;nbsp;NotNext&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;792&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/cbnXjc/btsFGjL9xPS/BmxGknlbKKU27a8BP91ap1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/cbnXjc/btsFGjL9xPS/BmxGknlbKKU27a8BP91ap1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/cbnXjc/btsFGjL9xPS/BmxGknlbKKU27a8BP91ap1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FcbnXjc%2FbtsFGjL9xPS%2FBmxGknlbKKU27a8BP91ap1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;792&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;792&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;C 토큰은&amp;nbsp;next sentence prediction(NSP)을 위한 토큰&lt;/li&gt;
&lt;li&gt;이&amp;nbsp;토큰 C를 이용하여&amp;nbsp;input으로 들어온 두 문장이 원래 corpus에서&amp;nbsp;이어 붙여져 있던 문장인지(IsNext) 아닌지(NotNext)를 맞춰가며 학습&lt;/li&gt;
&lt;/ul&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;✔ Masked LM은 숨겨진 단어를 예측하는 문제 &amp;rarr; 단어에 초점&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;✔ Next Sentence Prediction 문제는 두 문장의 관계를 파악해야 하는 문제 &amp;rarr; 문장에 초점, 더욱 넓은 범위의 이해를 요구&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;rArr; 상호 보완적인 두 가지 Pretraining 방법을 동시에 사용하여 더욱 다채로운 성능 가진다&lt;/p&gt;
&lt;h3 data-ke-size=&quot;size23&quot;&gt;&lt;b&gt;Fine-tuning Procedure&lt;/b&gt;&lt;/h3&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;&lt;b&gt;sequence-level classification tasks&lt;/b&gt;
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;input sequence에 대해서 일정한 차원수의 representation 결과를 얻는다 &amp;rarr; [CLS]&amp;nbsp;token의&amp;nbsp;&lt;b&gt;Transformer output&lt;/b&gt;값을 사용
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;$C \in \mathbb{ R }^H$&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;classify하고 싶은 갯수(K)에 따라 classification layer 붙인다
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;$W \in \mathbb{ R }^{K \times H}$&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;&lt;b&gt;span-level, token-level prediction tasks&lt;/b&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1902&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/nf2GP/btsFD3Q2j4W/ArlCgOIhNChp7iV6efpQTK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/nf2GP/btsFD3Q2j4W/ArlCgOIhNChp7iV6efpQTK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/nf2GP/btsFD3Q2j4W/ArlCgOIhNChp7iV6efpQTK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fnf2GP%2FbtsFD3Q2j4W%2FArlCgOIhNChp7iV6efpQTK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;1902&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1902&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;</description>
      <category>Archiving/Model</category>
      <category>NLP</category>
      <author>quasar529</author>
      <guid isPermaLink="true">https://quasar529.tistory.com/3</guid>
      <comments>https://quasar529.tistory.com/3#entry3comment</comments>
      <pubDate>Sun, 10 Mar 2024 19:27:39 +0900</pubDate>
    </item>
    <item>
      <title>HiFi: High-Information Attention Heads Hold for Parameter-Efficient Model Adaptation</title>
      <link>https://quasar529.tistory.com/2</link>
      <description>&lt;p data-ke-size=&quot;size16&quot;&gt;&lt;span style=&quot;font-family: AppleSDGothicNeo-Regular, 'Malgun Gothic', '맑은 고딕', dotum, 돋움, sans-serif;&quot;&gt;LLM (본 논문에서는 PLMs)은 large scale of parameters 가진다&lt;/span&gt;&lt;br /&gt;&lt;b&gt;➡ Data-Scarce &amp;amp; Resource-Limited 상황에서 Inefficient&lt;br /&gt;&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc; background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Catastrophic forgetting issues&lt;/li&gt;
&lt;li&gt;Limited storage infrastructure&lt;br /&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;826&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/CT9RI/btsFHnGZ25I/WSK10K1ZxsfkeFpeIHAsEk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/CT9RI/btsFHnGZ25I/WSK10K1ZxsfkeFpeIHAsEk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/CT9RI/btsFHnGZ25I/WSK10K1ZxsfkeFpeIHAsEk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FCT9RI%2FbtsFHnGZ25I%2FWSK10K1ZxsfkeFpeIHAsEk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1280&quot; height=&quot;826&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;826&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;hr data-ke-style=&quot;style1&quot; /&gt;
&lt;p style=&quot;background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;PEFT 등장&lt;br /&gt;&lt;b&gt;Only fine-tunes the minority of the original parameters&lt;br /&gt;&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc; background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Effectively decrease parameters&lt;/li&gt;
&lt;li&gt;BUT also lead concerns
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Breaks the model structure&lt;/li&gt;
&lt;li&gt;Inference delays&lt;br /&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;665&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/xQacj/btsFGBeJBkJ/xiLhttMeojRnIoZVYJI1WK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/xQacj/btsFGBeJBkJ/xiLhttMeojRnIoZVYJI1WK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/xQacj/btsFGBeJBkJ/xiLhttMeojRnIoZVYJI1WK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FxQacj%2FbtsFGBeJBkJ%2FxiLhttMeojRnIoZVYJI1WK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1280&quot; height=&quot;665&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;665&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;hr data-ke-style=&quot;style1&quot; /&gt;
&lt;p style=&quot;background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;Two types of Methods&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc; background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Structured Methods
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Extra introduced blocks : LoRA, Prompt-Tuning&lt;/li&gt;
&lt;li&gt;Internal original blocks : BitFit&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;Non-structured Methods&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1274&quot; data-origin-height=&quot;1286&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/lg7kc/btsFIiS2HCw/5Z4YD2cRb9iEGeW8J46gKK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/lg7kc/btsFIiS2HCw/5Z4YD2cRb9iEGeW8J46gKK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/lg7kc/btsFIiS2HCw/5Z4YD2cRb9iEGeW8J46gKK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Flg7kc%2FbtsFIiS2HCw%2F5Z4YD2cRb9iEGeW8J46gKK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1274&quot; height=&quot;1286&quot; data-origin-width=&quot;1274&quot; data-origin-height=&quot;1286&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr data-ke-style=&quot;style1&quot; /&gt;
&lt;p style=&quot;background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;HiFi&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc; background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Fine-tuning the relatively significant heads in MHA(multi-head attention module)
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;= Highly informative and Strongly correlated attention heads&lt;/li&gt;
&lt;li&gt;이유 : LLM 대부분 Transformer 기반 &amp;amp; MHA plays a crucial role&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;Two Big Challenges
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;How to measure the individual importance of a head?&lt;/li&gt;
&lt;li&gt;How to measure the relative importance between heads?&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;hr data-ke-style=&quot;style1&quot; /&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;442&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/SLSF3/btsFEOfha57/zlxYcW6LUG5MIgZzKZ8bcK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/SLSF3/btsFEOfha57/zlxYcW6LUG5MIgZzKZ8bcK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/SLSF3/btsFEOfha57/zlxYcW6LUG5MIgZzKZ8bcK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FSLSF3%2FbtsFEOfha57%2FzlxYcW6LUG5MIgZzKZ8bcK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1280&quot; height=&quot;442&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;442&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;h3 id=&quot;information-richness&quot; style=&quot;background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;Information Richness&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;203&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/SJHSA/btsFGSge63W/8qWosH3mY6V1LUNzXnaCEk/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/SJHSA/btsFGSge63W/8qWosH3mY6V1LUNzXnaCEk/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/SJHSA/btsFGSge63W/8qWosH3mY6V1LUNzXnaCEk/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FSJHSA%2FbtsFGSge63W%2F8qWosH3mY6V1LUNzXnaCEk%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1280&quot; height=&quot;203&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;203&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc; background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;$W_h$ ➡️ $O_h$ 근사&lt;/li&gt;
&lt;li&gt;$O_h(x)$ SVD
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Singular values ${&amp;sigma;t}$ decays slower == informative &amp;amp; contains more meaningful principal components&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;Information richness of an attention head as $Ih(W_h | x)$&lt;/li&gt;
&lt;li&gt;Monte-Carlo
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Stable results can be obtained using a small n (e.g., 300)&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&quot;correlation&quot; style=&quot;background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;Correlation&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;260&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/bx8WRX/btsFELv9lMb/05ank7c92xOwn4NxlpXbg0/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/bx8WRX/btsFELv9lMb/05ank7c92xOwn4NxlpXbg0/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/bx8WRX/btsFELv9lMb/05ank7c92xOwn4NxlpXbg0/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2Fbx8WRX%2FbtsFELv9lMb%2F05ank7c92xOwn4NxlpXbg0%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1280&quot; height=&quot;260&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;260&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc; background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Weights 간 Correlation ➡️ Outputs 간 Correlation으로 근사&lt;/li&gt;
&lt;li&gt;$O`h$ = average O over the sequence axis&lt;/li&gt;
&lt;li&gt;Correlation between two heads $(h, h&amp;prime;)$ is computed by the covariance
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;strong positive and negative should be considered equally ➡️ 절댓값&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;li&gt;Unbiased estimation of covariance (불편추정량)&lt;/li&gt;
&lt;li&gt;Monte-Carlo&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;438&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/clhU2J/btsFFY9js3n/zK0wpxasfCwiIG6Sq5ALkK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/clhU2J/btsFFY9js3n/zK0wpxasfCwiIG6Sq5ALkK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/clhU2J/btsFFY9js3n/zK0wpxasfCwiIG6Sq5ALkK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FclhU2J%2FbtsFFY9js3n%2FzK0wpxasfCwiIG6Sq5ALkK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1280&quot; height=&quot;438&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;438&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;h3 id=&quot;joint-optimization&quot; style=&quot;background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-size=&quot;size23&quot;&gt;Joint Optimization&lt;/h3&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;258&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/beQ9LC/btsFGKWLpw0/jmVrTwpgkUFZZqjdVK1ps1/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/beQ9LC/btsFGKWLpw0/jmVrTwpgkUFZZqjdVK1ps1/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/beQ9LC/btsFGKWLpw0/jmVrTwpgkUFZZqjdVK1ps1/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FbeQ9LC%2FbtsFGKWLpw0%2FjmVrTwpgkUFZZqjdVK1ps1%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1280&quot; height=&quot;258&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;258&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc; background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Heads into directed fully-connected graph&lt;/li&gt;
&lt;li&gt;$p_h^{(0)}$ = Initial probability per node&lt;/li&gt;
&lt;li&gt;$m_h , h`$ = Probability of moving from node h to another node h&amp;prime;&lt;/li&gt;
&lt;li&gt;$P (0) = [p(0), p(0), &amp;middot; &amp;middot; &amp;middot; , p(0)]⊤$ : Probability vector&lt;/li&gt;
&lt;li&gt;State transition probability matrix $M = [mh, h&amp;prime;] H&amp;times;H$&lt;/li&gt;
&lt;li&gt;PageRank
&lt;ul style=&quot;list-style-type: disc;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;d = damping factor&lt;br /&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1016&quot; data-origin-height=&quot;932&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/eQQ5Lv/btsFGVRyszL/gyieWset44cZhp0vafBh10/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/eQQ5Lv/btsFGVRyszL/gyieWset44cZhp0vafBh10/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/eQQ5Lv/btsFGVRyszL/gyieWset44cZhp0vafBh10/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FeQQ5Lv%2FbtsFGVRyszL%2FgyieWset44cZhp0vafBh10%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1016&quot; height=&quot;932&quot; data-origin-width=&quot;1016&quot; data-origin-height=&quot;932&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;/li&gt;
&lt;/ul&gt;
&lt;hr data-ke-style=&quot;style1&quot; /&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;897&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/sQTo8/btsFDB8hGNU/HChlCOdbZdeEFrmH1Ipg3K/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/sQTo8/btsFDB8hGNU/HChlCOdbZdeEFrmH1Ipg3K/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/sQTo8/btsFDB8hGNU/HChlCOdbZdeEFrmH1Ipg3K/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FsQTo8%2FbtsFDB8hGNU%2FHChlCOdbZdeEFrmH1Ipg3K%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1280&quot; height=&quot;897&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;897&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;hr data-ke-style=&quot;style1&quot; /&gt;
&lt;p style=&quot;background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-size=&quot;size16&quot;&gt;&lt;b&gt;Ablation&lt;/b&gt;&lt;/p&gt;
&lt;ul style=&quot;list-style-type: disc; background-color: #ffffff; color: #212529; text-align: start;&quot; data-ke-list-type=&quot;disc&quot;&gt;
&lt;li&gt;Q1: Does the correlation $(rh, h&amp;prime; )$ between heads really matter?&lt;/li&gt;
&lt;li&gt;Q2: Are the higher information richness (Ih) of heads more important for the model?&lt;/li&gt;
&lt;li&gt;Q3: Is it enough to only take the correlation (rh,h&amp;prime; ) into consideration, while ignoring the information richness (Ih)?&lt;/li&gt;
&lt;li&gt;Q4: Does PageRank algorithm really work?&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock alignCenter&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;300&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/ezEqNM/btsFF4OXNRP/7aiZOblkaosntbrwv5C6oK/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/ezEqNM/btsFF4OXNRP/7aiZOblkaosntbrwv5C6oK/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/ezEqNM/btsFF4OXNRP/7aiZOblkaosntbrwv5C6oK/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FezEqNM%2FbtsFF4OXNRP%2F7aiZOblkaosntbrwv5C6oK%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;1280&quot; height=&quot;300&quot; data-origin-width=&quot;1280&quot; data-origin-height=&quot;300&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>Paper Review/PEFT</category>
      <category>LLM</category>
      <category>peft</category>
      <author>quasar529</author>
      <guid isPermaLink="true">https://quasar529.tistory.com/2</guid>
      <comments>https://quasar529.tistory.com/2#entry2comment</comments>
      <pubDate>Sun, 10 Mar 2024 19:14:57 +0900</pubDate>
    </item>
    <item>
      <title>시작</title>
      <link>https://quasar529.tistory.com/1</link>
      <description>&lt;p data-ke-size=&quot;size16&quot;&gt;기술 블로그&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;이번엔 진짜입니다.&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;
&lt;p&gt;&lt;figure class=&quot;imageblock widthContent&quot; data-ke-mobileStyle=&quot;widthOrigin&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1448&quot;&gt;&lt;span data-url=&quot;https://blog.kakaocdn.net/dn/vCYGQ/btsFFmXcjLv/xrwcOhoSKIjOA1wxcJd931/img.png&quot; data-phocus=&quot;https://blog.kakaocdn.net/dn/vCYGQ/btsFFmXcjLv/xrwcOhoSKIjOA1wxcJd931/img.png&quot;&gt;&lt;img src=&quot;https://blog.kakaocdn.net/dn/vCYGQ/btsFFmXcjLv/xrwcOhoSKIjOA1wxcJd931/img.png&quot; srcset=&quot;https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&amp;fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FvCYGQ%2FbtsFFmXcjLv%2FxrwcOhoSKIjOA1wxcJd931%2Fimg.png&quot; onerror=&quot;this.onerror=null; this.src='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png'; this.srcset='//t1.daumcdn.net/tistory_admin/static/images/no-image-v1.png';&quot; loading=&quot;lazy&quot; width=&quot;2000&quot; height=&quot;1448&quot; data-origin-width=&quot;2000&quot; data-origin-height=&quot;1448&quot;/&gt;&lt;/span&gt;&lt;/figure&gt;
&lt;/p&gt;
&lt;p data-ke-size=&quot;size16&quot;&gt;&amp;nbsp;&lt;/p&gt;</description>
      <category>그냥</category>
      <category>시작</category>
      <author>quasar529</author>
      <guid isPermaLink="true">https://quasar529.tistory.com/1</guid>
      <comments>https://quasar529.tistory.com/1#entry1comment</comments>
      <pubDate>Sun, 10 Mar 2024 18:57:21 +0900</pubDate>
    </item>
  </channel>
</rss>